1 Table of Contents

2 Differential Expression

2.1 Preparations

2.1.1 Libraries

library(knitr) 
library(pacman)

p_load(plyr,
       tidyverse,
       VennDiagram,
       pheatmap,
       gplots,
       gridExtra,
       EnhancedVolcano,
       venn,
       xlsx,
       gprofiler2,
       corrplot,
       PerformanceAnalytics,
       Hmisc,
       edgeR,
       limma,
       Glimma,
       gplots,
       org.Mm.eg.db,
       RColorBrewer)

p_load_gh("kassambara/ggpubr")
source("./scripts/TAMPOR.R") 

2.1.2 Data

Mayo_Proteomics_TC_traits <- read.csv("./data_prot/Mayo_Proteomics_TC_traits.csv",
                                      sep = ",",
                                      header = TRUE,
                                      stringsAsFactors = FALSE) 
Mayo_Proteomics_TC_proteinoutput <- read.table("./data_prot/Mayo_Proteomics_TC_proteinoutput.txt",
                                               sep = "\t",
                                               header = TRUE,
                                               stringsAsFactors = FALSE) 

Mayo_Proteomics_TC_proteinoutput has dimensions: 6585, 2105.

2.1.3 Cleanup of Contaminants and Reverse

Format of the colnames: “LFQ.intensity.mayo_b5_egis_45”

## Fitler out Potential.contaminant and Reverse "+"
dat <- Mayo_Proteomics_TC_proteinoutput %>%
  filter(Potential.contaminant != "+") %>%
  filter(Reverse != "+") 

Now ours data has dimensions: 6335, 2105.

2.1.4 Set row names to Protein.IDs

rownames(dat) <- dat$Protein.IDs 

2.1.5 Select LFQ + mgis columns only

## Select LFQ + egis columns only
dat.m <- dat %>%
  dplyr::select(starts_with("LFQ")) %>%
  dplyr::select(!contains("egis"))

dim(dat.m) 
## [1] 6335  215

There are no missing values in our dataset but plenty of zeroes.

There are 3448027 zeroes.

Traits file:

  • lines 1:200 – samples
  • lines 201:215 – egis samples
  • lines 216:230 – mgis samples
traits <- Mayo_Proteomics_TC_traits[1:4] 

Rownames to satisfy TAMPOR’s conventions

rownames(traits) <- traits$Samples_Simple %>%
  gsub("_", "\\.", .)

traits.e <- traits[1:215, ] # egis
traits.m <- traits[c(1:200, 216:230), ] # mgis 

2.1.5.1 mgis data

Renaming columns to match the traits

dat.mt <- dat.m

## Renaming scheme:
## egis samples: "LFQ.intensity.mayo_b5_egis_24" -> "b5.24"
## non-egis:     "LFQ.intensity.mayo_b5_197_41" -> "b5.197"
colnames(dat.mt) <- dat.m %>%
  names %>%
  gsub("LFQ.intensity.mayo_", "", .) %>%
  gsub("mgis_", "", .) %>%
  gsub("([0-9]{3})_[0-9]{2}$", "\\1", .) %>% # to strip 2 last digits from non egis samples only
  gsub("_", "\\.", .) 

To make identical column and row names

dat.mt <- dat.mt[, rownames(traits.m)] 

2.2 TAMPOR

Cleanup

## see: https://github.com/edammer/TAMPOR/issues/2
dat.mt[dat.mt<=0]<- NA 

2.2.1 mgis useAllNonGIS=TRUE

## ## ## Samples designated as GIS were removed before visualization of variance and MDS
## TAMPORlist.MGIS.all.true <- TAMPOR(dat.mt, traits.m,
##                                  noGIS=FALSE,
##                                  useAllNonGIS=TRUE, # This assumes
##                                                     # batches are
##                                                     # generally
##                                                     # trait-balanced
##                                                     # and randomized,
##                                                     # but GIS and
##                                                     # non-GIS samples
##                                                     # can be grossly
##                                                     # different.
##                                  batchPrefixInSampleNames=TRUE,
##                                  GISchannels=c("01","23", "44", "45"),
##                                  samplesToIgnore = NULL,
##                                  removeGISafter = TRUE,
##                                  parallelThreads=2,
##                                  outputSuffix="MGIS")

## saveRDS(TAMPORlist.MGIS.all.true, "./results_prot/TAMPORlist.MGIS.all.true.rds")


TAMPORlist.MGIS.all.true <- readRDS("./results_prot/TAMPORlist.MGIS.all.true.rds")


## plotMDS(TAMPORlist.GIS$cleanRelAbun %>% log2, col = labels2colors(colnames(TAMPORlist.GIS$cleanRelAbun)))

cra <- TAMPORlist.MGIS.all.true$cleanRelAbun

cra.log <- log2(cra)

2.2.2 PCA of conditions

conditions.col <- traits$Diagnosis[1:200] %>%
  sub("Control", "green", .) %>%
  sub("AD", "red", .) %>%
  sub("PSP", "blue", .)

traw.log <- log2(dat.mt[1:200])
colnames(traw.log) <- traits[1:200,4] %>% make.unique

tcra.log <- cra.log
colnames(tcra.log) <- traits[1:200,4] %>% make.unique

require(limma)

## opar <- par(no.readonly=TRUE)
png("./figs_prot/mds_tampor_conditions2.png", height = 2000, width = 6000, units = "px")
## tiff("./figs_prot/mds_tampor_conditions2.tiff", height = 2000, width = 6000, units = "px")
par(mfrow = c(1, 2), cex = 5)

plotMDS(traw.log, col = conditions.col)
title("log2 (raw)")

plotMDS(tcra.log, col = conditions.col)
title("After TAMPOR (mgis)")

dev.off()
## png 
##   2
## par(opar)
## dev.off()

rm(traw.log, tcra.log)
Number of samples: AD = 84, PSP = 85, Control = 31

Figure 2.1: Number of samples: AD = 84, PSP = 85, Control = 31

2.3 FC and adjusted p-values cutoffs

pv_cut <- 5e-2
fc_cut <- 0.3 

2.4 DE genes

I used this source making the necessary additions and changes: Data Science Workshop British Society for Proteomic Research Meeting 2018.

2.4.1 Impute missing values

Impute the data by replacing the missing values with the median observation for each protein under each condition.

traits <- Mayo_Proteomics_TC_traits %>%
  filter(Diagnosis != "EmoryGlobalInternalStandard" &
           Diagnosis != "MayoGlobalInternalStandard") %>%
  dplyr::select(1:4)


control_cols <- traits %>%
  mutate(n = rownames(.) %>% as.numeric) %>%
  filter(Diagnosis == "Control") %>%
  .$n

ad_cols <- traits %>%
  mutate(n = rownames(.) %>% as.numeric) %>%
  filter(Diagnosis == "AD") %>%
  .$n

psp_cols <- traits %>%
  mutate(n = rownames(.) %>% as.numeric) %>%
  filter(Diagnosis == "PSP") %>%
  .$n

## mean normalisation
## replace_with_mean <- function(x) replace(x, x==0, mean(x))
## median normalisation
replace_with_median <- function(x) replace(x, is.na(x), median(x, na.rm = TRUE))

##
crad <- as.data.frame(cra)
## crad <- as.data.frame(cra[-c(23, 53, 84)])

control <- crad %>%
  dplyr::select(all_of(control_cols)) %>%
  apply(., 1, replace_with_median) %>%
  t %>%
  data.frame

ad <- crad %>%
  dplyr::select(all_of(ad_cols)) %>%
  apply(., 1, replace_with_median) %>%
  t %>%
  data.frame

psp <- crad %>%
  dplyr::select(all_of(psp_cols)) %>%
  apply(., 1, replace_with_median) %>%
  t %>%
  data.frame

data_imp <- cbind.data.frame(control, ad, psp) 

2.4.2 Quantile normalization

# Quantile normalisation : the aim is to give different distributions the
# same statistical properties
quantile_normalisation <- function(df){

  # Find rank of values in each column
  df_rank <- map_df(df,rank,ties.method="average")
  # Sort observations in each column from lowest to highest
  df_sorted <- map_df(df,sort)
  # Find row mean on sorted columns
  df_mean <- rowMeans(df_sorted)

  # Function for substiting mean values according to rank
  index_to_mean <- function(my_index, my_mean){
    return(my_mean[my_index])
  }

  # Replace value in each column with mean according to rank
  df_final <- map_df(df_rank,index_to_mean, my_mean=df_mean)

  return(df_final)
} 
data_norm <- data_imp %>%
  quantile_normalisation() 
data_imp %>% ggplot() +
  geom_density(aes(b1.001 %>% log2), col = "red") +
  geom_density(aes(b2.002 %>% log2), col = "green") +
  geom_density(aes(b3.011 %>% log2), col = "blue") +
  geom_density(aes(b4.005 %>% log2), col = "magenta") +
  geom_density(aes(b5.175 %>% log2)) +
  ggtitle("Dstribution before normalization (5 samples from different batches)") +
  xlab("log2 Intensity") +
  ylab("Density")

data_norm %>% ggplot() +
  geom_density(aes(b1.001 %>% log2), col = "red", lwd=2.5) +
  geom_density(aes(b2.002 %>% log2), col = "green", lwd=2) +
  geom_density(aes(b3.011 %>% log2), col = "blue", lwd = 1.5) +
  geom_density(aes(b4.005 %>% log2), col = "magenta", lwd = 1) +
  geom_density(aes(b5.175 %>% log2), lwd=0.5) +
  ggtitle("Dstribution after normalization (The same 5 samples)") +
  xlab("log2 Intensity") +
  ylab("Density") 

2.4.3 cra rearranged

traits <- Mayo_Proteomics_TC_traits %>%
  filter(Diagnosis != "EmoryGlobalInternalStandard" &
           Diagnosis != "MayoGlobalInternalStandard") %>%
  dplyr::select(1:4)


control_cols <- traits %>%
  mutate(n = rownames(.) %>% as.numeric) %>%
  filter(Diagnosis == "Control") %>%
  .$n

ad_cols <- traits %>%
  mutate(n = rownames(.) %>% as.numeric) %>%
  filter(Diagnosis == "AD") %>%
  .$n

psp_cols <- traits %>%
  mutate(n = rownames(.) %>% as.numeric) %>%
  filter(Diagnosis == "PSP") %>%
  .$n

## rearrange columns in cra (optional)
cra <- cra %>%
  data.frame %>%
  dplyr::select(all_of(c(control_cols, ad_cols, psp_cols))) 

2.4.4 Design matrix and linear model fitting

2.4.4.1 Create the design matrix

diagnosis <- c(rep("Control", length(control_cols)), rep("AD", length(ad_cols)), rep("PSP", length(psp_cols)))
design <- model.matrix( ~ diagnosis + 0) 

2.4.4.2 Fit linear model

data_norm <- data.frame(data_norm)
rownames(data_norm) <- rownames(cra) %>% sub("^.+\\|.+\\|(.+)_HUMAN.?", "\\1", .) %>% make.unique

require(limma)
fit <- lmFit(data_norm %>% log2, design) 

2.4.5 Significantly DE proteins: AD vs Control

Build a contrast AD vs Control

AvsC <- makeContrasts(diagnosisAD - diagnosisControl, levels=design) 
fit_AvsC <-contrasts.fit(fit, AvsC)

bf_AvsC <- eBayes(fit_AvsC)

bf_AvsC %>% decideTests %>% summary 
##        diagnosisAD - diagnosisControl
## Down                              405
## NotSig                           3219
## Up                                321

Thus, 405 proteins are significantly downregulated and 321 upregulated in AD versus Control samples (log2 FC cutoff is not applied here).

AvsC_top <- topTable(bf_AvsC, adjust="BH", sort.by = "logFC", number = 1000, p.value = pv_cut, lfc = fc_cut)

write.csv(AvsC_top, "./results_prot/prot_ad_vs_control.csv")

knitr::kable(AvsC_top,
             caption = "Top DE proteins: AD vs Control")
Table 2.1: Top DE proteins: AD vs Control
logFC AveExpr t P.Value adj.P.Val B
CO1A1 1.6130 27.50 4.261 0.0000 0.0008 2.1024
H0YD17 1.1071 27.56 4.353 0.0000 0.0006 2.4561
H0YL34 1.0660 28.03 4.872 0.0000 0.0001 4.5628
K7ES72 0.9553 26.02 3.236 0.0014 0.0132 -1.3885
HCN4 -0.9520 29.10 -3.557 0.0005 0.0060 -0.3824
C9J6J8 0.9262 26.19 4.712 0.0000 0.0002 3.8926
B5MCB5 0.8631 27.33 4.321 0.0000 0.0007 2.3334
CO4A 0.8441 29.15 5.636 0.0000 0.0000 7.9868
TAU.1 0.8439 30.91 6.673 0.0000 0.0000 13.1880
A6NMN0 0.8327 29.33 7.280 0.0000 0.0000 16.4879
H7C545 0.8286 29.01 6.306 0.0000 0.0000 11.2781
H0Y7G9 0.8094 26.57 6.075 0.0000 0.0000 10.1172
Q05BJ3 -0.7949 30.38 -4.244 0.0000 0.0008 2.0389
H0YG30 0.6852 27.10 8.127 0.0000 0.0000 21.3489
D6R9C5 0.6769 26.82 3.713 0.0003 0.0040 0.1342
C9JTJ8 0.6726 27.91 5.266 0.0000 0.0000 6.2818
C9IZ15 -0.6573 27.43 -3.123 0.0021 0.0167 -1.7208
K7EKD1 0.6552 28.46 4.238 0.0000 0.0008 2.0156
AB17A 0.6314 23.66 5.806 0.0000 0.0000 8.7998
IP3KB 0.6299 25.15 5.458 0.0000 0.0000 7.1575
K7EP04 0.5829 27.43 4.140 0.0001 0.0012 1.6486
A0A096LP69 0.5665 27.33 4.074 0.0001 0.0014 1.4060
F5H100 0.5464 27.74 3.188 0.0017 0.0147 -1.5287
A0A087WUA0 0.5431 30.80 2.672 0.0082 0.0454 -2.9413
PLEC 0.5368 25.82 6.567 0.0000 0.0000 12.6298
B0YJC5 0.5249 34.21 4.009 0.0001 0.0017 1.1692
C9J3N8 0.5035 31.71 5.258 0.0000 0.0000 6.2424
KV206 0.5021 25.12 4.628 0.0000 0.0003 3.5468
NPY 0.4981 26.80 4.750 0.0000 0.0002 4.0484
H3BV46 -0.4979 24.93 -4.868 0.0000 0.0001 4.5440
TCPR2 -0.4911 25.44 -2.932 0.0038 0.0264 -2.2598
VAMP3 0.4629 25.34 6.918 0.0000 0.0000 14.5012
NPTX2 -0.4626 25.92 -3.464 0.0007 0.0075 -0.6842
E9PNM5 -0.4625 27.86 -5.402 0.0000 0.0000 6.8978
H0YFX9 0.4623 30.41 6.542 0.0000 0.0000 12.4984
PGAM2 0.4611 27.85 2.851 0.0048 0.0315 -2.4793
C9JH18 -0.4596 33.84 -2.695 0.0076 0.0433 -2.8851
PLCA -0.4572 25.84 -2.676 0.0081 0.0451 -2.9326
Q5TBU2 0.4468 29.49 3.218 0.0015 0.0137 -1.4421
VPS39 0.4460 26.33 2.744 0.0066 0.0391 -2.7580
H3BP73 0.4446 27.71 4.356 0.0000 0.0006 2.4674
B5MC71 0.4195 26.82 3.560 0.0005 0.0060 -0.3742
UBCP1 -0.4177 24.84 -4.133 0.0001 0.0012 1.6243
F5GXC1 0.4143 26.25 3.030 0.0028 0.0208 -1.9854
F5GYV6 0.4097 32.18 3.070 0.0024 0.0190 -1.8725
BATF3 -0.4089 27.10 -4.397 0.0000 0.0006 2.6297
A0A0C4DGW9 0.4078 26.73 4.326 0.0000 0.0007 2.3515
H7C2Q0 0.4036 26.52 4.661 0.0000 0.0003 3.6822
X6RHB9 0.4028 27.84 6.056 0.0000 0.0000 10.0227
KGP2 0.4007 24.85 2.789 0.0058 0.0361 -2.6422
KCC2A 0.3976 30.01 4.493 0.0000 0.0004 3.0054
CXB6 -0.3946 27.40 -3.745 0.0002 0.0036 0.2417
H0YDG5 0.3935 27.68 4.709 0.0000 0.0002 3.8814
PIR 0.3906 25.55 3.491 0.0006 0.0071 -0.5961
ARAID 0.3882 37.13 3.937 0.0001 0.0021 0.9114
H7BZ97 0.3876 24.96 4.089 0.0001 0.0014 1.4628
B8ZZQ4 -0.3876 26.83 -4.476 0.0000 0.0004 2.9390
NDUV3.1 -0.3875 26.80 -2.841 0.0050 0.0322 -2.5053
A0A087WWT2 -0.3844 26.11 -3.805 0.0002 0.0031 0.4496
E9PQN5 -0.3836 25.91 -5.923 0.0000 0.0000 9.3660
GTD2A -0.3802 25.47 -3.478 0.0006 0.0073 -0.6368
G5E968 0.3754 29.60 4.617 0.0000 0.0003 3.5039
C9JFK9 0.3741 28.17 3.282 0.0012 0.0118 -1.2495
H7C0X8 0.3737 28.31 3.876 0.0001 0.0026 0.6950
B4DM24 0.3686 32.47 2.753 0.0064 0.0385 -2.7355
E5RG36CON__P17697 0.3671 31.65 5.746 0.0000 0.0000 8.5113
S4R3A2 0.3642 29.93 4.802 0.0000 0.0002 4.2662
ACTN2 -0.3618 26.14 -5.284 0.0000 0.0000 6.3627
SYT10 0.3604 26.36 4.514 0.0000 0.0004 3.0875
PADI2 0.3594 31.34 4.298 0.0000 0.0007 2.2458
S4R300 -0.3569 26.47 -3.531 0.0005 0.0065 -0.4681
F8VRJ1 -0.3564 31.25 -4.168 0.0000 0.0011 1.7529
A0A087X134 0.3560 32.85 5.787 0.0000 0.0000 8.7089
K7EP01 -0.3559 31.59 -3.550 0.0005 0.0062 -0.4069
D6RJA6 0.3539 29.16 4.542 0.0000 0.0003 3.2002
F2Z2Z8 0.3518 29.65 5.157 0.0000 0.0000 5.7956
F8VRU3 -0.3511 29.88 -4.224 0.0000 0.0009 1.9640
FABP7 0.3505 26.91 2.929 0.0038 0.0264 -2.2665
A0A0A0MTN3 0.3497 32.42 4.083 0.0001 0.0014 1.4403
PLXA2 -0.3492 25.44 -2.911 0.0040 0.0277 -2.3176
F8WC57 -0.3484 31.61 -3.798 0.0002 0.0032 0.4249
F5H2X6 0.3467 27.11 2.844 0.0049 0.0320 -2.4974
PYGL 0.3445 24.12 3.684 0.0003 0.0044 0.0367
GBG12 0.3443 29.67 4.903 0.0000 0.0001 4.6926
E9PHY3 0.3442 26.96 5.071 0.0000 0.0001 5.4178
K7ERU7 0.3366 24.53 3.626 0.0004 0.0050 -0.1573
LRC73 -0.3340 25.27 -4.153 0.0000 0.0011 1.6964
C9J5D8 -0.3331 25.82 -4.587 0.0000 0.0003 3.3832
STMN2 0.3294 30.56 3.660 0.0003 0.0047 -0.0443
Q5JXI0 0.3267 32.03 5.053 0.0000 0.0001 5.3368
NTNG2 -0.3264 24.18 -5.037 0.0000 0.0001 5.2669
SNTB1 0.3222 26.55 2.801 0.0056 0.0353 -2.6108
H0YJI1 -0.3220 27.91 -5.805 0.0000 0.0000 8.7935
C2C4C -0.3213 26.59 -3.312 0.0011 0.0110 -1.1586
C9J618 -0.3196 25.17 -4.312 0.0000 0.0007 2.2999
H3BNP3 -0.3191 25.54 -3.772 0.0002 0.0034 0.3350
Q5T3N0 0.3161 29.34 2.820 0.0053 0.0337 -2.5598
C9JSB2 0.3149 25.83 5.002 0.0000 0.0001 5.1186
CXA1 0.3135 31.91 3.381 0.0009 0.0092 -0.9447
NDUA1 -0.3133 27.84 -3.631 0.0004 0.0050 -0.1398
ADCK3 -0.3113 26.75 -3.547 0.0005 0.0062 -0.4161
AAKB2 0.3102 27.01 4.635 0.0000 0.0003 3.5762
SCRG1 -0.3093 24.93 -3.892 0.0001 0.0024 0.7518
AHNK 0.3091 31.67 3.658 0.0003 0.0047 -0.0505
C9J6E1 0.3081 24.32 3.431 0.0007 0.0081 -0.7864
GBG4 -0.3058 27.05 -3.309 0.0011 0.0111 -1.1665
C9K0I3 0.3051 24.49 3.153 0.0019 0.0157 -1.6337
E7ERP7 0.3051 30.76 3.254 0.0013 0.0127 -1.3320
E9PMM2 -0.3047 25.37 -2.897 0.0042 0.0286 -2.3559
F5GXR3 0.3045 28.77 4.691 0.0000 0.0002 3.8077
A0A087WYF2 -0.3044 27.08 -4.070 0.0001 0.0014 1.3906
SMAP 0.3029 27.47 3.219 0.0015 0.0137 -1.4392
H3BP03 -0.3028 25.54 -3.908 0.0001 0.0023 0.8078
M0QYX4 -0.3024 30.34 -4.586 0.0000 0.0003 3.3766

2.4.6 Significantly DE proteins: PSP vs Control

Build a contrast PSP vs Control

PvsC <- makeContrasts(diagnosisPSP - diagnosisControl, levels=design) 
fit_PvsC <-contrasts.fit(fit, PvsC)

bf_PvsC <- eBayes(fit_PvsC)

bf_PvsC %>% decideTests %>% summary 
##        diagnosisPSP - diagnosisControl
## Down                               346
## NotSig                            3078
## Up                                 521

Thus, 346 proteins are significantly downregulated and 521 upregulated in PSP versus Control samples (log2 FC cutoff is not applied here).

PvsC_top <- topTable(bf_PvsC, adjust="BH", sort.by = "logFC", number = 1000, p.value = pv_cut, lfc = fc_cut)

write.csv(PvsC_top, "./results_prot/prot_psp_vs_control.csv")

knitr::kable(PvsC_top,
             caption = "Top DE proteins: PSP vs Control") 
Table 2.2: Top DE proteins: PSP vs Control
logFC AveExpr t P.Value adj.P.Val B
C9JNG9 -1.0261 28.78 -2.655 0.0086 0.0422 -2.9069
H0YD17 -1.0174 27.56 -4.006 0.0001 0.0019 1.2084
F5H5D6 -0.9964 25.49 -3.786 0.0002 0.0032 0.4361
K7ES72 -0.9227 26.02 -3.130 0.0020 0.0155 -1.6311
K7EKD1 -0.7262 28.46 -4.704 0.0000 0.0003 3.8881
CO6A1 -0.7045 27.86 -3.135 0.0020 0.0155 -1.6183
C9JH18 -0.6400 33.84 -3.758 0.0002 0.0034 0.3425
A0A087WWP1 -0.6125 28.23 -3.655 0.0003 0.0044 -0.0024
H7C545 -0.5931 29.01 -4.520 0.0000 0.0006 3.1474
C9JTJ8 0.5914 27.91 4.638 0.0000 0.0004 3.6184
A0A087WU08 0.5771 29.40 2.672 0.0082 0.0409 -2.8654
SYT2 -0.5672 28.24 -2.906 0.0041 0.0252 -2.2572
ARAID -0.5557 37.13 -5.644 0.0000 0.0000 8.0207
H7C1X2 -0.5295 25.32 -3.432 0.0007 0.0079 -0.7217
S10AA -0.5180 26.09 -4.281 0.0000 0.0011 2.2207
CART 0.5100 25.08 2.685 0.0079 0.0398 -2.8338
ANO1 0.5086 28.94 5.873 0.0000 0.0000 9.1119
H12 -0.5082 27.62 -4.073 0.0001 0.0017 1.4486
ZO2 -0.5048 27.01 -3.419 0.0008 0.0081 -0.7625
D6REL8CON__P02676 0.5043 29.44 2.684 0.0079 0.0398 -2.8353
C9JHZ9 0.4883 28.79 4.177 0.0000 0.0014 1.8294
D6R9C5 0.4867 26.82 2.674 0.0081 0.0408 -2.8601
K7ESQ2 -0.4780 27.82 -3.373 0.0009 0.0091 -0.9057
SYNPO 0.4771 30.88 4.099 0.0001 0.0016 1.5441
OPA3 0.4693 26.38 4.654 0.0000 0.0004 3.6828
E9PR42 -0.4606 27.67 -6.107 0.0000 0.0000 10.2576
E9PS65 -0.4592 26.09 -3.186 0.0017 0.0138 -1.4695
E9PLK6 -0.4585 30.49 -6.060 0.0000 0.0000 10.0215
C9JUP6 -0.4559 25.69 -4.129 0.0001 0.0015 1.6530
H0YLE2 -0.4421 30.81 -4.008 0.0001 0.0019 1.2153
A0A087X1B9 -0.4409 25.38 -3.879 0.0001 0.0026 0.7574
C9JA18 0.4380 29.26 4.063 0.0001 0.0017 1.4117
K7EP08 -0.4373 26.26 -4.142 0.0001 0.0014 1.7022
KCC2A 0.4341 30.01 4.913 0.0000 0.0002 4.7561
H0YK70 -0.4298 25.81 -3.361 0.0009 0.0094 -0.9437
A2A2P1 -0.4144 25.08 -3.379 0.0009 0.0090 -0.8863
KV206 0.4118 25.12 3.802 0.0002 0.0031 0.4904
B4DM24 0.4104 32.47 3.071 0.0024 0.0174 -1.8014
Q5T3N0 -0.4099 29.34 -3.662 0.0003 0.0044 0.0209
H1X -0.4071 28.21 -3.561 0.0005 0.0056 -0.3106
STMN2 0.4069 30.56 4.529 0.0000 0.0006 3.1815
KCA10 -0.4062 25.50 -3.843 0.0002 0.0029 0.6334
CAH4 0.4061 28.55 3.790 0.0002 0.0032 0.4509
A0A087WW91 0.4053 25.53 2.575 0.0108 0.0493 -3.1051
CLIC1 -0.4029 26.64 -3.068 0.0025 0.0175 -1.8082
CIRBP -0.3937 27.70 -3.609 0.0004 0.0050 -0.1550
B0YJC5 -0.3932 34.21 -3.008 0.0030 0.0201 -1.9788
F6THM6 0.3928 31.60 3.620 0.0004 0.0049 -0.1199
ASAH1 -0.3879 28.57 -4.835 0.0000 0.0002 4.4283
F5GXC1 -0.3873 26.25 -2.837 0.0050 0.0290 -2.4412
M0QZC9 -0.3859 25.12 -5.392 0.0000 0.0000 6.8590
Q5SZC5 0.3854 33.37 3.678 0.0003 0.0042 0.0738
E9PBF6 -0.3852 27.65 -2.848 0.0049 0.0283 -2.4135
H7C2E7 -0.3838 30.15 -3.421 0.0008 0.0081 -0.7560
X6RHB9 0.3830 27.84 5.767 0.0000 0.0000 8.6012
RBM3 -0.3828 25.14 -3.073 0.0024 0.0174 -1.7950
E9PFT6 0.3805 31.34 3.186 0.0017 0.0138 -1.4696
D6RAF1 -0.3774 24.56 -3.589 0.0004 0.0052 -0.2208
H0YDG5 0.3729 27.68 4.470 0.0000 0.0006 2.9503
H0YHD8 0.3727 31.42 6.349 0.0000 0.0000 11.4729
E9PMM2 -0.3724 25.37 -3.546 0.0005 0.0058 -0.3593
H0YAT3 -0.3647 24.79 -4.279 0.0000 0.0011 2.2138
C9JFK9 -0.3611 28.17 -3.172 0.0017 0.0141 -1.5085
AHNK -0.3605 31.67 -4.272 0.0000 0.0011 2.1878
CXA1 -0.3596 31.91 -3.885 0.0001 0.0026 0.7772
GTD2A -0.3592 25.47 -3.292 0.0012 0.0109 -1.1549
A0A087WWY6 -0.3587 27.43 -3.912 0.0001 0.0024 0.8734
F5H143 0.3582 30.58 3.488 0.0006 0.0068 -0.5460
A0A087WX08 -0.3572 26.05 -4.964 0.0000 0.0002 4.9733
D6RCD0 -0.3557 25.27 -4.292 0.0000 0.0010 2.2630
H7C4X7 0.3554 28.66 4.807 0.0000 0.0002 4.3132
A2A2A0 -0.3545 28.30 -3.255 0.0013 0.0119 -1.2647
H7C1L2 -0.3542 31.33 -2.928 0.0038 0.0241 -2.1979
K7EQB2 -0.3538 24.67 -3.270 0.0013 0.0116 -1.2201
G8JLE9 -0.3528 28.42 -4.610 0.0000 0.0004 3.5075
MYPT2 0.3514 28.80 2.984 0.0032 0.0213 -2.0454
C9J4M8 0.3511 30.80 3.266 0.0013 0.0116 -1.2321
1A02 -0.3508 27.12 -2.755 0.0064 0.0349 -2.6560
C9JSB2 0.3477 25.83 5.532 0.0000 0.0000 7.5000
F5GYV6 0.3440 32.18 2.582 0.0105 0.0489 -3.0876
RGS14 0.3434 24.91 4.204 0.0000 0.0013 1.9303
Q5QPM2 -0.3414 26.77 -4.669 0.0000 0.0004 3.7442
GID8 0.3405 25.50 3.384 0.0009 0.0089 -0.8710
H3BQC7 0.3371 29.51 3.611 0.0004 0.0050 -0.1486
FKBP5 -0.3369 26.03 -3.881 0.0001 0.0026 0.7653
PADI2 -0.3368 31.34 -4.035 0.0001 0.0018 1.3109
EHD2 -0.3356 26.90 -3.462 0.0007 0.0073 -0.6289
H7C1N8 0.3349 29.12 4.983 0.0000 0.0002 5.0545
C9J7D8 -0.3325 26.24 -3.978 0.0001 0.0020 1.1051
H0Y9Q1 -0.3298 24.23 -2.850 0.0048 0.0282 -2.4085
LAMB4 -0.3288 27.81 -3.641 0.0003 0.0046 -0.0503
J3KRM4 -0.3286 33.04 -4.279 0.0000 0.0011 2.2128
MARF1 0.3261 25.32 5.121 0.0000 0.0001 5.6529
I3L3L3 -0.3250 26.44 -4.197 0.0000 0.0013 1.9035
PDLI5 -0.3214 27.85 -4.627 0.0000 0.0004 3.5745
H0YLR3 -0.3200 27.68 -3.824 0.0002 0.0030 0.5680
E9PS38 0.3183 28.41 3.212 0.0015 0.0130 -1.3912
K7EP01 -0.3179 31.59 -3.176 0.0017 0.0140 -1.4993
PVRL1 0.3178 29.09 4.940 0.0000 0.0002 4.8709
PRDX6 -0.3176 33.36 -4.707 0.0000 0.0003 3.8982
E9PQM0 -0.3175 24.01 -3.894 0.0001 0.0025 0.8116
A0A087WY99 -0.3174 25.21 -3.944 0.0001 0.0022 0.9863
SCN4B -0.3165 25.04 -3.130 0.0020 0.0155 -1.6311
H3BN15 -0.3144 25.19 -3.267 0.0013 0.0116 -1.2281
A6PVC3 -0.3133 29.57 -3.702 0.0003 0.0040 0.1536
H2AZ -0.3133 29.25 -3.246 0.0014 0.0122 -1.2908
E9PL00 0.3133 29.26 4.751 0.0000 0.0003 4.0802
FUBP3 -0.3130 24.66 -4.854 0.0000 0.0002 4.5070
LY6H 0.3128 30.85 3.736 0.0002 0.0036 0.2659
G3XAK4 -0.3084 28.57 -5.019 0.0000 0.0002 5.2086
PLEC -0.3056 25.82 -3.745 0.0002 0.0035 0.2975
UBCP1 -0.3053 24.84 -3.026 0.0028 0.0193 -1.9265
RASL1 0.3049 27.57 2.690 0.0078 0.0395 -2.8215
SAMH1 -0.3047 26.71 -3.033 0.0027 0.0190 -1.9083
J3QRJ3 0.3046 33.29 5.745 0.0000 0.0000 8.4984
CCKN 0.3028 29.58 4.271 0.0000 0.0011 2.1824
PLXA1 0.3026 29.67 3.347 0.0010 0.0097 -0.9859
ARHG6 -0.3023 25.12 -5.735 0.0000 0.0000 8.4503
G3V368 0.3016 26.07 2.766 0.0062 0.0339 -2.6274

2.4.7 Significantly DE proteins: AD vs PSP

Build a contrast AD vs PSP

AvsP <- makeContrasts(diagnosisAD - diagnosisPSP, levels=design) 
fit_AvsP <-contrasts.fit(fit, AvsP)

bf_AvsP <- eBayes(fit_AvsP)

bf_AvsP %>% decideTests %>% summary 
##        diagnosisAD - diagnosisPSP
## Down                         1240
## NotSig                       1882
## Up                            823

Thus, 1240 proteins are significantly downregulated and 823 upregulated in PSP versus Control samples (log2 FC cutoff is not applied here).

AvsP_top <- topTable(bf_AvsP, adjust="BH", sort.by = "logFC", number = 1000, p.value = pv_cut, lfc = fc_cut)

write.csv(AvsP_top, "./results_prot/prot_ad_vs_psp.csv")

knitr::kable(AvsP_top,
             caption = "Top DE proteins: AD vs PSP") 
Table 2.3: Top DE proteins: AD vs PSP
logFC AveExpr t P.Value adj.P.Val B
H0YD17 2.1245 27.56 11.409 0.0000 0.0000 42.8256
CO1A1 1.9168 27.50 6.916 0.0000 0.0000 14.4058
K7ES72 1.8781 26.02 8.689 0.0000 0.0000 24.9031
C9JNG9 1.8114 28.78 6.393 0.0000 0.0000 11.5777
F8VNV6 1.5912 27.33 5.489 0.0000 0.0000 7.0452
H7C545 1.4217 29.01 14.777 0.0000 0.0000 66.1769
K7EKD1 1.3815 28.46 12.204 0.0000 0.0000 48.2846
PRELP 1.2962 26.59 6.093 0.0000 0.0000 10.0209
F5H5D6 1.2512 25.49 6.483 0.0000 0.0000 12.0564
H0YL34 1.1763 28.03 7.344 0.0000 0.0000 16.8207
Q05BJ3 -1.1738 30.38 -8.560 0.0000 0.0000 24.1038
CO6A1 1.1617 27.86 7.050 0.0000 0.0000 15.1513
C9J6J8 1.1422 26.19 7.937 0.0000 0.0000 20.2993
GMPR1 1.0703 25.32 5.960 0.0000 0.0000 9.3496
ARAID 0.9439 37.13 13.075 0.0000 0.0000 54.3219
C9IZ15 -0.9310 27.43 -6.042 0.0000 0.0000 9.7635
B1PS43 0.9295 28.65 3.907 0.0001 0.0005 0.4095
B0YJC5 0.9181 34.21 9.578 0.0000 0.0000 30.5775
ZO2 0.8855 27.01 8.180 0.0000 0.0000 21.7680
K7EP04 0.8564 27.43 8.307 0.0000 0.0000 22.5427
TAU.1 0.8518 30.91 9.200 0.0000 0.0000 28.1417
PGAM2 0.8487 27.85 7.167 0.0000 0.0000 15.8141
PLEC 0.8425 25.82 14.077 0.0000 0.0000 61.3047
Q5TBF5 0.8360 25.90 4.299 0.0000 0.0001 1.8865
CO4A 0.8018 29.15 7.312 0.0000 0.0000 16.6395
H0Y7G9 0.8017 26.57 8.219 0.0000 0.0000 22.0027
F5GXC1 0.8016 26.25 8.009 0.0000 0.0000 20.7307
J3QR93 0.7748 28.06 6.252 0.0000 0.0000 10.8420
IP3KB 0.7747 25.15 9.169 0.0000 0.0000 27.9433
A0A087WWP1 0.7688 28.23 6.257 0.0000 0.0000 10.8675
Q5TBU2 0.7560 29.49 7.437 0.0000 0.0000 17.3578
C9JFK9 0.7353 28.17 8.809 0.0000 0.0000 25.6592
Q5T3N0 0.7260 29.34 8.847 0.0000 0.0000 25.8974
H0YLE2 0.7234 30.81 8.945 0.0000 0.0000 26.5163
NPTX2 -0.7152 25.92 -7.314 0.0000 0.0000 16.6519
C9J3N8 0.7096 31.71 10.120 0.0000 0.0000 34.1377
SYT2 0.7024 28.24 4.908 0.0000 0.0000 4.4046
A6NMN0 0.6998 29.33 8.358 0.0000 0.0000 22.8517
HCN4 -0.6965 29.10 -3.555 0.0005 0.0015 -0.8093
PADI2 0.6962 31.34 11.374 0.0000 0.0000 42.5812
CXA1 0.6731 31.91 9.916 0.0000 0.0000 32.7887
H0YG30 0.6717 27.10 10.882 0.0000 0.0000 39.2411
AHNK 0.6696 31.67 10.823 0.0000 0.0000 38.8413
H7C3S1 0.6617 27.73 3.869 0.0001 0.0005 0.2753
CLIC1 0.6557 26.64 6.811 0.0000 0.0000 13.8297
PROD 0.6507 27.04 5.189 0.0000 0.0000 5.6520
A0A096LP69 0.6400 27.33 6.287 0.0000 0.0000 11.0236
KGP2 0.6400 24.85 6.084 0.0000 0.0000 9.9779
H0Y796 0.6349 28.24 5.163 0.0000 0.0000 5.5386
H7C2E7 0.6235 30.15 7.581 0.0000 0.0000 18.1916
S10AA 0.6046 26.09 6.815 0.0000 0.0000 13.8500
AB17A 0.5850 23.66 7.347 0.0000 0.0000 16.8405
E9PNM5 -0.5849 27.86 -9.333 0.0000 0.0000 28.9934
RASL1 -0.5812 27.57 -6.992 0.0000 0.0000 14.8263
A0A087WWT2 -0.5790 26.11 -7.829 0.0000 0.0000 19.6531
H7C0M5 0.5778 26.23 3.613 0.0004 0.0012 -0.6167
J3KRM4 0.5775 33.04 10.254 0.0000 0.0000 35.0244
SNTB1 0.5767 26.55 6.847 0.0000 0.0000 14.0267
H7C0X8 0.5734 28.31 8.122 0.0000 0.0000 21.4146
S10A8 0.5671 23.77 5.853 0.0000 0.0000 8.8123
R4GNC7 0.5595 27.05 5.104 0.0000 0.0000 5.2726
K7ESQ2 0.5507 27.82 5.300 0.0000 0.0000 6.1609
F6THM6 -0.5495 31.60 -6.907 0.0000 0.0000 14.3573
F2Z2Z8 0.5449 29.65 10.911 0.0000 0.0000 39.4328
B5MCB5 0.5446 27.33 3.724 0.0003 0.0009 -0.2367
C9JZU8 -0.5322 26.00 -5.958 0.0000 0.0000 9.3370
PLCD1 0.5311 29.49 10.629 0.0000 0.0000 37.5318
Q5TBN5 0.5214 26.03 3.982 0.0001 0.0004 0.6828
H7BZ97 0.5164 24.96 7.442 0.0000 0.0000 17.3837
C2C4C -0.5138 26.59 -7.233 0.0000 0.0000 16.1869
R4GN98 0.5133 29.10 3.819 0.0002 0.0006 0.0956
F8VRJ1 -0.5093 31.25 -8.136 0.0000 0.0000 21.4986
HMGN2 0.5082 29.70 5.548 0.0000 0.0000 7.3282
PRDX6 0.5049 33.36 10.205 0.0000 0.0000 34.6998
HECAM 0.5021 30.99 10.208 0.0000 0.0000 34.7244
H0YFB9 -0.5009 29.19 -8.043 0.0000 0.0000 20.9380
Q5T3J1 0.4997 31.31 4.834 0.0000 0.0000 4.0851
C9J6E1 0.4965 24.32 7.553 0.0000 0.0000 18.0280
K7ER39 0.4917 28.07 7.575 0.0000 0.0000 18.1586
E7EUI6 0.4896 26.16 8.162 0.0000 0.0000 21.6571
C9JHZ9 -0.4895 28.79 -5.711 0.0000 0.0000 8.1144
BATF3 -0.4894 27.10 -7.188 0.0000 0.0000 15.9329
D6RCN3 0.4885 32.07 7.964 0.0000 0.0000 20.4590
A0A087X134 0.4881 32.85 10.838 0.0000 0.0000 38.9419
H3BV46 -0.4872 24.93 -6.507 0.0000 0.0000 12.1811
F2Z2Q2 0.4861 26.40 4.578 0.0000 0.0000 3.0053
CYTB 0.4796 27.86 7.152 0.0000 0.0000 15.7255
H0Y933 0.4790 24.95 11.312 0.0000 0.0000 42.1605
A6PVC3 0.4769 29.57 7.684 0.0000 0.0000 18.8003
FABP7 0.4750 26.91 5.423 0.0000 0.0000 6.7362
PLEC.1 0.4748 35.30 10.998 0.0000 0.0000 40.0289
F5H1F2 0.4744 27.03 8.505 0.0000 0.0000 23.7568
H3BP73 0.4734 27.71 6.335 0.0000 0.0000 11.2742
H7C1X2 0.4732 25.32 4.183 0.0000 0.0002 1.4359
E9PJ32 0.4721 28.42 3.353 0.0010 0.0028 -1.4639
HPCA -0.4704 28.92 -9.256 0.0000 0.0000 28.5001
GPC5B 0.4694 27.45 4.531 0.0000 0.0001 2.8128
HNMT 0.4688 26.74 5.216 0.0000 0.0000 5.7771
H7C2Q0 0.4678 26.52 7.380 0.0000 0.0000 17.0282
GBG12 0.4668 29.67 9.080 0.0000 0.0000 27.3713
J3KT01 0.4640 31.12 3.871 0.0001 0.0005 0.2801
CYBR1 0.4640 26.78 6.111 0.0000 0.0000 10.1135
J3QKL8 -0.4617 27.01 -5.813 0.0000 0.0000 8.6142
NFH 0.4582 33.03 5.304 0.0000 0.0000 6.1834
E9PKC9 0.4567 25.90 6.775 0.0000 0.0000 13.6287
S10AB 0.4541 27.99 3.914 0.0001 0.0005 0.4356
RTN4 -0.4531 26.83 -9.029 0.0000 0.0000 27.0486
SPTN1 0.4522 27.80 7.539 0.0000 0.0000 17.9465
E9PGA7 0.4518 32.54 9.233 0.0000 0.0000 28.3498
Q5T8Q9 0.4492 27.55 2.890 0.0043 0.0103 -2.8345
DLGP2 -0.4443 25.64 -4.918 0.0000 0.0000 4.4478
E7ERP7 0.4363 30.76 6.357 0.0000 0.0000 11.3905
B8ZZJ2 0.4356 27.05 2.981 0.0032 0.0081 -2.5801
F8WCH0 0.4343 33.37 3.971 0.0001 0.0004 0.6429
A0A087WX08 0.4333 26.05 8.210 0.0000 0.0000 21.9511
K7ELP4 0.4330 25.27 6.358 0.0000 0.0000 11.3920
B0QY64 0.4320 24.98 4.371 0.0000 0.0001 2.1698
D6RAU9 -0.4305 26.56 -6.603 0.0000 0.0000 12.6960
MARF1 -0.4301 25.32 -9.212 0.0000 0.0000 28.2138
D6R9P2 0.4254 28.64 7.898 0.0000 0.0000 20.0643
ANXA4 0.4244 28.10 9.250 0.0000 0.0000 28.4587
C9J1K7 0.4207 26.64 5.990 0.0000 0.0000 9.5002
GHC2 0.4170 27.14 6.587 0.0000 0.0000 12.6103
H0Y4F5 0.4161 30.98 5.827 0.0000 0.0000 8.6876
TAU.3 0.4160 26.73 7.409 0.0000 0.0000 17.1950
K7ELJ5 -0.4147 25.64 -7.792 0.0000 0.0000 19.4356
D6RAF1 0.4123 24.56 5.348 0.0000 0.0000 6.3844
H0YFX9 0.4110 30.41 7.944 0.0000 0.0000 20.3411
ACBG2 0.4105 26.51 5.597 0.0000 0.0000 7.5633
A8MRB1 0.4104 32.73 6.924 0.0000 0.0000 14.4499
H3BQC7 -0.4098 29.51 -5.987 0.0000 0.0000 9.4856
NPTXR -0.4079 28.76 -5.045 0.0000 0.0000 5.0047
PDLI5 0.4074 27.85 7.997 0.0000 0.0000 20.6604
CATZ 0.4073 25.34 7.148 0.0000 0.0000 15.7039
K7EP08 0.4072 26.26 5.260 0.0000 0.0000 5.9781
AL1L2 0.4054 31.75 9.570 0.0000 0.0000 30.5283
H7C5L4 0.4045 26.19 5.624 0.0000 0.0000 7.6922
A0A0C4DGC5 0.4043 31.34 8.518 0.0000 0.0000 23.8414
CSPG2 0.4025 33.30 7.218 0.0000 0.0000 16.1037
ASAH1 0.4018 28.57 6.830 0.0000 0.0000 13.9341
MERL 0.4017 32.14 9.287 0.0000 0.0000 28.6966
C9J7C7 0.3995 27.51 8.019 0.0000 0.0000 20.7914
MAP4 0.3993 30.77 9.791 0.0000 0.0000 31.9677
C9J7D8 0.3990 26.24 6.509 0.0000 0.0000 12.1926
A0A0C4DFY8 -0.3971 27.36 -3.759 0.0002 0.0008 -0.1144
A0A0C4DGZ9 0.3966 31.03 9.404 0.0000 0.0000 29.4498
Q5SZU1 0.3962 31.53 8.229 0.0000 0.0000 22.0640
U3KQQ1 0.3936 33.86 7.686 0.0000 0.0000 18.8097
ACTN2 -0.3925 26.14 -7.831 0.0000 0.0000 19.6681
B4DUT8 0.3911 28.46 6.595 0.0000 0.0000 12.6560
A0A087WWY6 0.3896 27.43 5.795 0.0000 0.0000 8.5287
G3V3R6 0.3893 26.66 6.425 0.0000 0.0000 11.7472
H0YK70 0.3891 25.81 4.150 0.0000 0.0002 1.3099
F5H617 -0.3888 29.15 -5.909 0.0000 0.0000 9.0916
E9PFT6 -0.3878 31.34 -4.428 0.0000 0.0001 2.3974
D6RJA3 -0.3876 25.57 -6.038 0.0000 0.0000 9.7421
H2A1D 0.3853 32.90 6.513 0.0000 0.0000 12.2134
D6RGJ1 0.3847 26.41 3.917 0.0001 0.0005 0.4474
H0Y592 0.3836 26.16 5.716 0.0000 0.0000 8.1374
H0Y972 -0.3831 26.09 -6.208 0.0000 0.0000 10.6121
A0A087X1B9 0.3825 25.38 4.589 0.0000 0.0000 3.0526
PTRF 0.3817 28.48 6.976 0.0000 0.0000 14.7425
F5GXS0 0.3813 25.97 4.437 0.0000 0.0001 2.4342
C9IZA0 0.3812 26.84 6.740 0.0000 0.0000 13.4394
C9J8K5 0.3806 26.70 7.699 0.0000 0.0000 18.8893
Q5RI18 0.3796 25.43 6.292 0.0000 0.0000 11.0509
ACBD7 0.3795 29.79 6.806 0.0000 0.0000 13.8005
A0A096LPE1 0.3794 29.79 8.616 0.0000 0.0000 24.4499
F8WE61 0.3790 28.00 5.274 0.0000 0.0000 6.0438
H2BFS 0.3789 30.18 5.306 0.0000 0.0000 6.1913
DOCK3 -0.3785 26.78 -6.364 0.0000 0.0000 11.4244
E5RIX3 -0.3779 29.88 -6.491 0.0000 0.0000 12.0954
PARVA 0.3775 26.81 7.987 0.0000 0.0000 20.5974
H0Y7N1 0.3766 28.87 9.623 0.0000 0.0000 30.8693
C1QC 0.3764 26.36 3.886 0.0001 0.0005 0.3361
RGS14 -0.3746 24.91 -6.254 0.0000 0.0000 10.8500
J3QQX8 0.3725 26.19 5.218 0.0000 0.0000 5.7871
TPD53 0.3719 26.85 6.255 0.0000 0.0000 10.8546
D6RC12 -0.3699 26.10 -5.905 0.0000 0.0000 9.0719
PLXB1 0.3698 27.17 5.015 0.0000 0.0000 4.8724
D6R9P1 0.3696 27.76 3.082 0.0023 0.0061 -2.2890
CES1P 0.3690 33.86 3.268 0.0013 0.0036 -1.7307
ANO1 -0.3684 28.94 -5.801 0.0000 0.0000 8.5558
H0YAT3 0.3681 24.79 5.890 0.0000 0.0000 8.9968
FUBP3 0.3673 24.66 7.768 0.0000 0.0000 19.2921
E5RFY9 0.3672 26.13 6.100 0.0000 0.0000 10.0560
PIR 0.3669 25.55 4.479 0.0000 0.0001 2.6004
LAMB4 0.3662 27.81 5.530 0.0000 0.0000 7.2426
Q5JT49 0.3647 27.14 4.865 0.0000 0.0000 4.2194
H7C457 0.3630 25.14 5.429 0.0000 0.0000 6.7627
M0QZP8 -0.3624 27.60 -8.128 0.0000 0.0000 21.4517
PLXA2 -0.3616 25.44 -4.117 0.0001 0.0002 1.1849
D6RFY4 -0.3608 27.46 -6.983 0.0000 0.0000 14.7783
LY6H -0.3599 30.85 -5.862 0.0000 0.0000 8.8581
MACD1 0.3597 27.82 7.479 0.0000 0.0000 17.6023
D6REM0 -0.3588 27.01 -5.448 0.0000 0.0000 6.8543
NDUV3.1 -0.3585 26.80 -3.590 0.0004 0.0013 -0.6934
TBB2B 0.3576 28.44 3.204 0.0016 0.0043 -1.9264
A6PVK2 -0.3572 24.55 -6.813 0.0000 0.0000 13.8406
E9PBF6 0.3563 27.65 3.593 0.0004 0.0013 -0.6845
H0Y7G2 -0.3550 29.46 -7.516 0.0000 0.0000 17.8121
SCN4B 0.3546 25.04 4.783 0.0000 0.0000 3.8643
E5RG36CON__P17697 0.3546 31.65 7.582 0.0000 0.0000 18.2008
SHAN2 -0.3543 26.86 -5.463 0.0000 0.0000 6.9242
C9JUP6 0.3541 25.69 4.373 0.0000 0.0001 2.1776
VAMP1 0.3536 29.14 6.138 0.0000 0.0000 10.2546
I3L0T3 0.3527 27.55 3.553 0.0005 0.0015 -0.8168
E9PQS8 -0.3526 26.55 -7.308 0.0000 0.0000 16.6143
H0YIZ1 0.3522 25.96 5.104 0.0000 0.0000 5.2714
RBM3 0.3500 25.14 3.832 0.0002 0.0006 0.1436
ARHGP -0.3490 25.97 -4.081 0.0001 0.0003 1.0502
TPPP3 0.3478 30.63 6.098 0.0000 0.0000 10.0494
A0A087WW77 0.3473 29.04 8.957 0.0000 0.0000 26.5909
VA0D2 -0.3456 28.44 -4.859 0.0000 0.0000 4.1936
1B37 0.3452 25.54 4.327 0.0000 0.0001 1.9935
C9JWY7 0.3445 29.29 9.140 0.0000 0.0000 27.7582
X6R242 0.3431 27.18 3.323 0.0011 0.0030 -1.5592
ADDG 0.3429 31.65 11.410 0.0000 0.0000 42.8286
H12 0.3428 27.62 3.746 0.0002 0.0008 -0.1591
CAV3 0.3425 26.71 4.920 0.0000 0.0000 4.4580
MRP 0.3388 28.53 7.517 0.0000 0.0000 17.8192
CKLF5 0.3387 26.50 4.356 0.0000 0.0001 2.1104
H3BSP9 0.3380 31.04 5.224 0.0000 0.0000 5.8157
H0YFB7 0.3376 26.90 5.498 0.0000 0.0000 7.0897
CLIC5 0.3370 29.44 5.124 0.0000 0.0000 5.3619
SERC 0.3360 31.04 8.639 0.0000 0.0000 24.5936
C9JDT0 -0.3359 30.46 -7.025 0.0000 0.0000 15.0133
GBG5 0.3357 29.22 4.994 0.0000 0.0000 4.7811
H3BMD4 -0.3329 25.61 -4.842 0.0000 0.0000 4.1176
E9PNP4 0.3328 31.82 7.710 0.0000 0.0000 18.9499
E5RHA9 -0.3315 28.16 -7.380 0.0000 0.0000 17.0277
F8VU44 -0.3308 25.84 -5.916 0.0000 0.0000 9.1293
F8VXJ5 0.3303 28.74 2.730 0.0069 0.0156 -3.2649
DLGP3 -0.3296 27.60 -5.889 0.0000 0.0000 8.9944
B5MC71 0.3291 26.82 3.815 0.0002 0.0006 0.0822
Q5T0I0 0.3286 33.42 7.506 0.0000 0.0000 17.7562
RTN1 -0.3283 29.60 -7.909 0.0000 0.0000 20.1336
HUG1 0.3281 28.59 8.043 0.0000 0.0000 20.9391
H7C144 -0.3279 26.84 -7.452 0.0000 0.0000 17.4417
J3KS57 0.3271 26.03 4.951 0.0000 0.0000 4.5906
H0Y3R8 0.3269 28.55 7.546 0.0000 0.0000 17.9879
SYNPO -0.3268 30.88 -3.829 0.0002 0.0006 0.1317
VAMP3 0.3268 25.34 6.671 0.0000 0.0000 13.0651
H3BT58 0.3263 30.10 8.707 0.0000 0.0000 25.0150
H0Y921 0.3252 28.36 7.668 0.0000 0.0000 18.7063
E9PS65 0.3250 26.09 3.075 0.0024 0.0062 -2.3085
A2A3C4 -0.3245 24.65 -5.750 0.0000 0.0000 8.3037
GBG4 -0.3243 27.05 -4.793 0.0000 0.0000 3.9090
S4R3U1 0.3239 26.14 5.248 0.0000 0.0000 5.9257
H7C1E7 0.3221 29.92 3.119 0.0021 0.0055 -2.1792
TRI25 0.3221 24.71 3.740 0.0002 0.0008 -0.1795
E9PJM7 0.3219 26.10 4.803 0.0000 0.0000 3.9494
B1AQP8 0.3207 29.11 6.773 0.0000 0.0000 13.6174
J3KRN1 0.3196 27.23 6.641 0.0000 0.0000 12.9015
A0A087WUT1 0.3193 30.10 5.415 0.0000 0.0000 6.6981
OPA3 -0.3192 26.38 -4.316 0.0000 0.0001 1.9531
GSTT1 0.3184 27.87 2.396 0.0175 0.0350 -4.0888
H0YLF3 0.3178 29.08 4.408 0.0000 0.0001 2.3178
H1T 0.3172 31.84 5.554 0.0000 0.0000 7.3562
H3BPG5 0.3170 24.99 5.389 0.0000 0.0000 6.5773
CTGE8 -0.3163 27.28 -6.221 0.0000 0.0000 10.6824
K7ENJ7 0.3160 28.89 8.133 0.0000 0.0000 21.4773
EHD2 0.3158 26.90 4.442 0.0000 0.0001 2.4529
H3BRQ4 0.3143 26.82 3.809 0.0002 0.0007 0.0610
C1QL2 -0.3136 26.53 -6.512 0.0000 0.0000 12.2081
GRIK2 -0.3124 27.08 -4.819 0.0000 0.0000 4.0200
I3L1K1 -0.3124 25.74 -5.096 0.0000 0.0000 5.2331
H7C5B5 -0.3119 24.89 -4.730 0.0000 0.0000 3.6406
AOFB 0.3111 32.40 6.271 0.0000 0.0000 10.9414
A0A087WW91 -0.3110 25.53 -2.694 0.0077 0.0171 -3.3581
TLN1 0.3084 29.94 8.015 0.0000 0.0000 20.7661
H3BQZ3 0.3084 30.16 8.184 0.0000 0.0000 21.7917
J3QSF4 -0.3077 26.28 -6.427 0.0000 0.0000 11.7569
ADCK3 -0.3068 26.75 -4.775 0.0000 0.0000 3.8320
CAPZB 0.3067 26.92 5.750 0.0000 0.0000 8.3043
SNX32 -0.3067 23.79 -4.210 0.0000 0.0002 1.5394
J3QRJ3 -0.3065 33.29 -7.885 0.0000 0.0000 19.9883
TSPOA 0.3051 25.97 6.795 0.0000 0.0000 13.7385
1A02 0.3051 27.12 3.267 0.0013 0.0036 -1.7333
E9PJJ8 0.3043 27.59 6.115 0.0000 0.0000 10.1371
ADXL -0.3041 30.01 -7.205 0.0000 0.0000 16.0268
H3BNK3 -0.3035 28.22 -7.938 0.0000 0.0000 20.3055
H90B4 -0.3019 29.25 -5.312 0.0000 0.0000 6.2200
SAMH1 0.3012 26.71 4.087 0.0001 0.0003 1.0748
F8W8D9 -0.3009 27.84 -4.485 0.0000 0.0001 2.6276
E5RG14 -0.3005 29.08 -4.291 0.0000 0.0001 1.8554
LRC73 -0.3003 25.27 -5.100 0.0000 0.0000 5.2510
Q6IPE9 -0.3002 27.61 -5.657 0.0000 0.0000 7.8541

2.4.8 Table with log-FC and Adjusted p-values

dat_fc_pv <- bind_cols(ProteinIDs = rownames(data_imp),
                       fc_control_ad = topTable(bf_AvsC, sort = "none", n = Inf)$logFC,
                       fc_control_psp = topTable(bf_PvsC, sort = "none", n = Inf)$logFC,
                       fc_ad_psp = topTable(bf_AvsP, sort = "none", n = Inf)$logFC,
                       ad_control_pv = topTable(bf_AvsC, sort = "none", n = Inf)$adj.P.Val,
                       psp_control_pv = topTable(bf_PvsC, sort = "none", n = Inf)$adj.P.Val,
                       ad_psp_pv = topTable(bf_AvsP, sort = "none", n = Inf)$adj.P.Val,
                       PIDshort = rownames(data_norm)) 

2.4.9 List of up and downregulated DE proteins

The results are collected in an Excel file, the name of which includes the criteria (adjusted p-values ​​and log2FC). If an Excel file with the same name and page exists, then the data is not overwritten and an error message appears. Therefore, before running the code again with the same parameters (the same adjusted p-values+FC), you need to delete this file.

AD_C_UP <- dat_fc_pv %>%
  filter(ad_control_pv <= pv_cut) %>%
  filter(fc_control_ad >= fc_cut) %>%
  arrange(-abs(fc_control_ad)) %>%
  dplyr::select(PIDshort, ad_control_pv, fc_control_ad)

AD_C_DOWN <- dat_fc_pv %>%
  filter(ad_control_pv <= pv_cut) %>%
  filter(fc_control_ad <= -fc_cut) %>%
  arrange(-abs(fc_control_ad)) %>%
  dplyr::select(PIDshort, ad_control_pv, fc_control_ad)

PSP_C_UP <- dat_fc_pv %>%
  filter(psp_control_pv <= pv_cut) %>%
  filter(fc_control_psp >= fc_cut) %>%
  arrange(-abs(fc_control_psp)) %>%
  dplyr::select(PIDshort, psp_control_pv, fc_control_psp)

PSP_C_DOWN <- dat_fc_pv %>%
  filter(psp_control_pv <= pv_cut) %>%
  filter(fc_control_psp <= -fc_cut) %>%
  arrange(-abs(fc_control_psp)) %>%
  dplyr::select(PIDshort, psp_control_pv, fc_control_psp)

AD_PSP_UP <- dat_fc_pv %>%
  filter(ad_psp_pv <= pv_cut) %>%
  filter(fc_ad_psp >= fc_cut) %>%
  arrange(-abs(fc_ad_psp)) %>%
  dplyr::select(PIDshort, ad_psp_pv, fc_ad_psp)

AD_PSP_DOWN <- dat_fc_pv %>%
  filter(ad_psp_pv <= pv_cut) %>%
  filter(fc_ad_psp <= -fc_cut) %>%
  arrange(-abs(fc_ad_psp)) %>%
  dplyr::select(PIDshort, ad_psp_pv, fc_ad_psp)

require(xlsx)

write.xlsx(AD_C_UP,
           file = paste0("./results_prot/DE_genes_p-value_", pv_cut, "_log2FC_", fc_cut, ".xlsx"),
           append = TRUE,
           sheetName = "AD vs Control UP")

write.xlsx(AD_C_DOWN,
           file = paste0("./results_prot/DE_genes_p-value_", pv_cut, "_log2FC_", fc_cut, ".xlsx"),
           append = TRUE,
           sheetName = "AD vs Control DOWN")

write.xlsx(PSP_C_UP,
           file = paste0("./results_prot/DE_genes_p-value_", pv_cut, "_log2FC_", fc_cut, ".xlsx"),
           append = TRUE,
           sheetName = "PSP vs Control UP")

write.xlsx(PSP_C_DOWN,
           file = paste0("./results_prot/DE_genes_p-value_", pv_cut, "_log2FC_", fc_cut, ".xlsx"),
           append = TRUE,
           sheetName = "PSP vs Control DOWN")

write.xlsx(AD_PSP_UP,
           file = paste0("./results_prot/DE_genes_p-value_", pv_cut, "_log2FC_", fc_cut, ".xlsx"),
           append = TRUE,
           sheetName = "AD vs PSP UP")

write.xlsx(AD_PSP_DOWN,
           file = paste0("./results_prot/DE_genes_p-value_", pv_cut, "_log2FC_", fc_cut, ".xlsx"),
           append = TRUE,
           sheetName = "AD vs PSP DOWN") 

2.4.10 Clustering and heatmaps

2.4.10.1 Clustering AD-Control

Prepare an object for clustering

data_log2 <- data_norm %>% log2

d <- data_log2

## Set clear column names
colnames(d) <- c(rep("Control", 31), rep("AD", 84), rep("PSP", 85)) %>% make.unique

## add columns with short protein IDs, log2 folds,  adjusted p-values
data_all <- data.frame(d, dat_fc_pv)

rm(d) 

Filter the data for AD vs Control according to a threshold of significance: adjusted p-values 0.05 and log2 fold change to 0.3.

dat_filt_ad <- data_all %>% filter(abs(fc_control_ad) >= fc_cut &
                                     ad_control_pv <= pv_cut) %>%
  arrange(-abs(fc_control_ad)) %>%
  ## slice_head(n=25) %>%
  dplyr::select(PIDshort, (starts_with("AD.") | starts_with("Control")) ) 

Transform to matrix

## data.matrix() leaves numeric data as is.
ad_matrix <- data.matrix(dat_filt_ad)

rownames(ad_matrix) <- dat_filt_ad$PIDshort

ad_matrix <- ad_matrix[,-1] 
ad_scaled <- ad_matrix %>% t %>% scale %>% t 

Euclidean distance between experiments: AD & Control

ad_dist <- ad_scaled %>% t %>%
  dist(., method = "euclidean", diag = FALSE, upper = FALSE) 

Euclidean distance between proteins: AD & Control

ad_dist_prot <- ad_scaled %>% 
  dist(., method = "euclidean", diag = FALSE, upper = FALSE) 

Hierarchical Ward’s clustering

ad_hclust <- hclust(ad_dist, method = "ward.D2", members = NULL)
ad_hclust_prot <- hclust(ad_dist_prot, method = "ward.D2", members = NULL)


png("./figs_prot/hclust_ad.png", width = 2500, height = 3000, res = 300)
par(mfrow = c(2,1))
ad_hclust %>% plot(cex = 0.4, main = "Conditions (Number of samples: AD = 84, Control = 31)")
ad_hclust_prot %>% plot(cex = 0.6, main = "Proteins (Number of samples: AD = 84, Control = 31)")
dev.off() 
## png 
##   2

2.4.10.2 Heatmap AD-Control

# Set colours for heatmap, 25 increments
my_palette <- colorRampPalette(c("blue","white","red"))(n = 25)

png("./figs_prot/heatmap_ad.png", width = 2000, height = 2000, res = 300)
# Plot heatmap with heatmap.2
par(cex.main=0.75) # Shrink title fonts on plot
ad_scaled %>%
  # Plot heatmap
  gplots::heatmap.2(.,                     # Tidy, normalised data
                    Colv=as.dendrogram(ad_hclust),     # Experiments clusters in cols
                    Rowv=as.dendrogram(ad_hclust_prot),     # Protein clusters in rows
                    revC=TRUE,                  # Flip plot to match pheatmap
                    density.info="histogram",   # Plot histogram of data and colour key
                    trace="none",               # Turn of trace lines from heat map
                    col = my_palette,           # Use my colour scheme
                    cexRow=0.3,cexCol=0.2,      # Amend row and column label fonts
                    xlab = paste0("Number of samples: AD = ", length(ad_cols), ", Control = ", length(control_cols))
                    )
dev.off() 
## png 
##   2

2.4.10.3 Clustering PSP-Control

Filter the data for PSP vs Control according to a threshold of significance: adjusted p-values 0.05 and log2 fold change to 0.3.

dat_filt_psp <- data_all %>% filter(abs(fc_control_psp) >= fc_cut &
                                     psp_control_pv <= pv_cut) %>%
  arrange(-abs(fc_control_psp)) %>%
  ## slice_head(n=25) %>%
  dplyr::select(PIDshort, (starts_with("PSP.") | starts_with("Control")) ) 

Transform to matrix

## data.matrix() leaves numeric data as is.
psp_matrix <- data.matrix(dat_filt_psp)

rownames(psp_matrix) <- dat_filt_psp$PIDshort

psp_matrix <- psp_matrix[,-1] 
psp_scaled <- psp_matrix %>% t %>% scale %>% t 

Euclidean distance between experiments: AD & Control

psp_dist <- psp_scaled %>% t %>%
  dist(., method = "euclidean", diag = FALSE, upper = FALSE) 

Euclidean distance between proteins: AD & Control

psp_dist_prot <- psp_scaled %>%
  dist(., method = "euclidean", diag = FALSE, upper = FALSE) 

Hierarchical Ward’s clustering

psp_hclust <- hclust(psp_dist, method = "ward.D2", members = NULL)
psp_hclust_prot <- hclust(psp_dist_prot, method = "ward.D2", members = NULL)


png("./figs_prot/hclust_psp.png", width = 2500, height = 3000, res = 300)
par(mfrow = c(2,1))
psp_hclust %>% plot(cex = 0.4, main = "Conditions (Number of samples: PSP = 85, Control = 31)")
psp_hclust_prot %>% plot(cex = 0.6, main = "Proteins (Number of samples: PSP = 85, Control = 31)")
dev.off() 
## png 
##   2

2.4.10.4 Heatmap PSP-Control

# Set colours for heatmap, 25 increments
my_palette <- colorRampPalette(c("blue","white","red"))(n = 25)

# Plot heatmap with heatmap.2
png("./figs_prot/heatmap_psp.png", width = 2000, height = 2000, res = 300)
par(cex.main=0.75) # Shrink title fonts on plot
psp_scaled %>%
  # Plot heatmap
  gplots::heatmap.2(.,                     # Tidy, normalised data
          Colv=as.dendrogram(psp_hclust),     # Experiments clusters in cols
          Rowv=as.dendrogram(psp_hclust_prot),     # Protein clusters in rows
          revC=TRUE,                  # Flip plot to match pheatmap
          density.info="histogram",   # Plot histogram of data and colour key
          trace="none",               # Turn of trace lines from heat map
          col = my_palette,           # Use my colour scheme
          cexRow=0.3, cexCol=0.3,     # Amend row and column label fonts
          xlab = paste0("Number of samples: PSP = ", length(psp_cols), ", Control = ", length(control_cols))
          )
dev.off() 
## png 
##   2

2.4.10.5 Clustering AD-PSP

Filter the data for AD vs PSP according to a threshold of significance: adjusted p-values 0.05 and log2 fold change to 0.3.

dat_filt_adpsp <- data_all %>% filter(abs(fc_ad_psp) >= fc_cut &
                                     ad_psp_pv <= pv_cut) %>%
  arrange(-abs(fc_ad_psp)) %>%
  ## slice_head(n=25) %>%
  dplyr::select(PIDshort, (starts_with("AD.") | starts_with("PSP.")) ) 

Transform to matrix

## data.matrix() leaves numeric data as is.
adpsp_matrix <- data.matrix(dat_filt_adpsp)

rownames(adpsp_matrix) <- dat_filt_adpsp$PIDshort

adpsp_matrix <- adpsp_matrix[,-1] 
adpsp_scaled <- adpsp_matrix %>% t %>% scale %>% t 

Euclidean distance between experiments: AD & Control

adpsp_dist <- adpsp_scaled %>% t %>%
  dist(., method = "euclidean", diag = FALSE, upper = FALSE) 

Euclidean distance between proteins: AD & Control

adpsp_dist_prot <- adpsp_scaled %>%
  dist(., method = "euclidean", diag = FALSE, upper = FALSE) 

Hierarchical Ward’s clustering

adpsp_hclust <- hclust(adpsp_dist, method = "ward.D2", members = NULL)
adpsp_hclust_prot <- hclust(adpsp_dist_prot, method = "ward.D2", members = NULL)


png("./figs_prot/hclust_ad_psp.png", width = 2500, height = 3000, res = 300)
par(mfrow = c(2,1))
adpsp_hclust %>% plot(cex = 0.2, main = "Conditions (Number of samples: AD = 84, PSP = 85)")
adpsp_hclust_prot %>% plot(cex = 0.2, main = "Proteins (Number of samples: AD = 84, PSP = 85)")
dev.off() 
## png 
##   2

2.4.10.6 Heatmap AD-PSP

# Set colours for heatmap, 25 increments
my_palette <- colorRampPalette(c("blue","white","red"))(n = 25)

# Plot heatmap with heatmap.2
png("./figs_prot/heatmap_ad_psp.png", width = 2000, height = 2000, res = 300)
par(cex.main=0.75) # Shrink title fonts on plot
adpsp_scaled %>%
  # Plot heatmap
  gplots::heatmap.2(.,                     # Tidy, normalised data
          Colv=as.dendrogram(adpsp_hclust),     # Experiments clusters in cols
          Rowv=as.dendrogram(adpsp_hclust_prot),     # Protein clusters in rows
          revC=TRUE,                  # Flip plot to match pheatmap
          density.info="histogram",   # Plot histogram of data and colour key
          trace="none",               # Turn of trace lines from heat map
          col = my_palette,           # Use my colour scheme
          cexRow=0.3, cexCol=0.3,     # Amend row and column label fonts
          xlab = paste0("Number of samples: AD = ", length(ad_cols), ", PSP = ", length(psp_cols))
          )
dev.off() 
## png 
##   2

2.4.11 Venn diagram for significantly DE proteins (adj. p-values<0.05, FC>0.3)

2.4.11.1 All proteins

venn_list <- list("AD vs Control" =  dat_fc_pv %>%
                    filter(ad_control_pv <= pv_cut) %>%
                    ## filter(ad_control_pv <= pv_cut & abs(fc_control_ad) > fc_cut) %>%
                    pull(PIDshort),
                  "PSP vs Control" = dat_fc_pv %>%
                    filter(psp_control_pv <= pv_cut) %>%
                    ## filter(psp_control_pv <= pv_cut & abs(fc_control_psp) > fc_cut) %>%
                    pull(PIDshort),
                  "AD vs PSP" = dat_fc_pv %>%
                    filter(ad_psp_pv <= pv_cut) %>%
                    ## filter(ad_psp_pv <= pv_cut & abs(fc_ad_psp) > fc_cut) %>%
                    pull(PIDshort)
                  )

# Prevent the output of a log file
futile.logger::flog.threshold(futile.logger::ERROR, name = "VennDiagramLogger")
## NULL
# Create a venn diagram object
prot_venn <- venn.diagram(venn_list,
                          NULL,
                          col = "transparent",
                          fill = c("cornflowerblue", "green", "yellow"),
                          alpha = 0.50,
                          cex = 0.8,
                          fontfamily = "sans",
                          fontface = "bold",
                          cat.col = c("darkblue", "darkgreen", "orange"),
                          cat.cex = 0.8,
                          cat.fontfamily = "sans",
                          margin = 0.2,
                          main = paste0("Pairwise significantly DE proteins (adj.p<",pv_cut,") identified in the experiment"),
                          main.pos = c(0.5, 1.05),
                          main.fontfamily = "sans",
                          sub = "Number of samples: AD = 84, PSP = 85, Control = 31",
                          sub.pos = c(0.5, 0.92),
                          sub.cex = 0.8,
                          sub.fontfamily = "sans",
                          print.mode = c("raw","percent"), # Show both numbers and percent
                          )

# Plot the venn diagram using the gridExtra package
png("./figs_prot/venn.png", width = 2000, height = 2000, res = 300)
grid.arrange(gTree(children = prot_venn))
dev.off() 
## png 
##   2

venn_list2 <- list(
  "AD-C +" =  dat_fc_pv %>%
    filter(ad_control_pv <= pv_cut & fc_control_ad >= 0) %>%
    pull(PIDshort),
  "AD-C -" =  dat_fc_pv %>%
    filter(ad_control_pv <= pv_cut & fc_control_ad < 0) %>%
    pull(PIDshort),
  "PSP-C +" =  dat_fc_pv %>%
    filter(psp_control_pv <= pv_cut & fc_control_psp >= 0) %>%
    pull(PIDshort),
  "PSP-C -" =  dat_fc_pv %>%
    filter(psp_control_pv <= pv_cut & fc_control_psp < 0) %>%
    pull(PIDshort),
  "AD-PSP +" =  dat_fc_pv %>%
    filter(ad_psp_pv <= pv_cut & fc_ad_psp >= 0) %>%
    pull(PIDshort),
  "AD-PSP -" =  dat_fc_pv %>%
    filter(ad_psp_pv <= pv_cut & fc_ad_psp < 0) %>%
    pull(PIDshort)
) 

2.4.11.2 Upregulated proteins

prot_venn_up <- venn.diagram(venn_list2[c(1,3,5)],NULL,
                          col = "transparent",
                          fill = c("cornflowerblue", "green", "yellow"),
                          alpha = 0.50,
                          cex = 0.8,
                          fontfamily = "sans",
                          fontface = "bold",
                          cat.col = c("darkblue", "darkgreen", "orange"),
                          cat.cex = 0.8,
                          cat.fontfamily = "sans",
                          margin = 0.2,
                          main = "Pairwise significantly upregulated proteins (adj.p<0.05) identified in the experiment",
                          main.fontfamily = "sans",
                          sub = "Number of samples: AD = 84, PSP = 85, Control = 31",
                          sub.pos = c(0.5, 0.92),
                          sub.cex = 0.8,
                          sub.fontfamily = "sans",
                          print.mode = c("raw","percent"), # Show both numbers and percent
                          main.pos = c(0.5, 1.05)
                          )

# Plot the venn diagram using the gridExtra package
png("./figs_prot/venn_up.png", width = 2000, height = 2000, res = 300)
grid.arrange(gTree(children = prot_venn_up))
dev.off()
## png 
##   2

2.4.11.3 Downregulated proteins

prot_venn_down <- venn.diagram(venn_list2[c(2,4,6)],NULL,
                          col = "transparent",
                          fill = c("cornflowerblue", "green", "yellow"),
                          alpha = 0.50,
                          cex = 0.8,
                          fontfamily = "sans",
                          fontface = "bold",
                          cat.col = c("darkblue", "darkgreen", "orange"),
                          cat.cex = 0.8,
                          cat.fontfamily = "sans",
                          margin = 0.2,
                          main = "Pairwise significantly downregulated proteins (adj.p<0.05) identified in the experiment",
                          main.pos = c(0.5, 1.05),
                          main.fontfamily = "sans",
                          sub = "Number of samples: AD = 84, PSP = 85, Control = 31",
                          sub.pos = c(0.5, 0.92),
                          sub.cex = 0.8,
                          sub.fontfamily = "sans",
                          print.mode = c("raw","percent") # Show both numbers and percent
                          )

# Plot the venn diagram using the gridExtra package
png("./figs_prot/venn_down.png", width = 2000, height = 2000, res = 300)
grid.arrange(gTree(children = prot_venn_down))
dev.off()
## png 
##   2

2.5 Enchanced Volcano plots

EnhancedVolcano(dat_fc_pv,
                x = "fc_control_ad",
                y = "ad_control_pv",
                lab = dat_fc_pv$PIDshort,
                title = 'AD versus Control',
                subtitle = "Number of samples: AD = 84, PSP = 85, Control = 31",
                pCutoff = pv_cut,
                FCcutoff = fc_cut,
                pointSize = 1.0,
                labSize = 3.0) +
  ggplot2::scale_y_continuous(limits = c(0, 7)) +
  ggplot2::scale_x_continuous(limits = c(-1, 1))
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

ggsave("./figs_prot/ad_vs_control_volcano.png")
## Saving 7 x 5 in image
ad_vs_control_proteins <- dat_fc_pv %>%
  dplyr::select(PIDshort, fc = fc_control_ad, pv = ad_control_pv) %>%
  arrange(-abs(fc)) %>%
  filter(pv <= pv_cut & abs(fc) >= fc_cut)

require(xlsx)
write.xlsx(ad_vs_control_proteins,
           file = paste0("./results_prot/significantly_de_proteins_pv_", pv_cut, "_log2FC_", fc_cut, ".xlsx"),
           sheetName = "AD vs Control", append=TRUE)

EnhancedVolcano(dat_fc_pv,
                x = "fc_control_psp",
                y = "psp_control_pv",
                lab = dat_fc_pv$PIDshort,
                title = 'PSP versus Control',
                subtitle = "Number of samples: AD = 84, PSP = 85, Control = 31",
                pCutoff = pv_cut,
                FCcutoff = fc_cut,
                pointSize = 1.0,
                labSize = 3.0) +
  ggplot2::scale_y_continuous(limits = c(0, 6)) +
  ggplot2::scale_x_continuous(limits = c(-1, 1))
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

ggsave("./figs_prot/psp_vs_control_volcano.png")
## Saving 7 x 5 in image
psp_vs_control_proteins <- dat_fc_pv %>%
  dplyr::select(PIDshort, fc = fc_control_psp, pv = psp_control_pv) %>%
  arrange(-abs(fc)) %>%
  filter(pv <= pv_cut & abs(fc) >= fc_cut)

require(xlsx)
write.xlsx(psp_vs_control_proteins,
           file = paste0("./results_prot/significantly_de_proteins_pv_", pv_cut, "_log2FC_", fc_cut, ".xlsx"),
           sheetName = "PSP vs Control", append=TRUE)

EnhancedVolcano(dat_fc_pv,
                x = "fc_ad_psp",
                y = "ad_psp_pv",
                lab = dat_fc_pv$PIDshort,
                title = 'PSP versus AD',
                subtitle = "Number of samples: AD = 84, PSP = 85, Control = 31",
                pCutoff = pv_cut,
                FCcutoff = fc_cut,
                pointSize = 1.0,
                labSize = 3.0) +
  ggplot2::scale_y_continuous(limits = c(0, 20)) +
  ggplot2::scale_x_continuous(limits = c(-1, 1))
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

ggsave("./figs_prot/psp_vs_ad_volcano.png")
## Saving 7 x 5 in image
psp_vs_ad_proteins <- dat_fc_pv %>%
  dplyr::select(PIDshort, fc = fc_ad_psp, pv = ad_psp_pv) %>%
  arrange(-abs(fc)) %>%
  filter(pv <= pv_cut & abs(fc) >= fc_cut)

require(xlsx)
## write.xlsx(psp_vs_ad_proteins, "significantly_de_proteins.xlsx", sheetName = "PSP vs AD", append=TRUE)
write.xlsx(psp_vs_ad_proteins,
           file = paste0("./results_prot/significantly_de_proteins_pv_", pv_cut, "_log2FC_", fc_cut, ".xlsx"),
           sheetName = "PSP vs AD", append=TRUE) 

2.6 Boxplots

2.6.1 AD vs Control

## On the example of AD vs Control.

## At first i took the significantly DE genes for e.g. AD vs Control.
## Data with FC and p-values ​​has already been collected in dat_fc_pv. I
## had to select data from this table only for AD vs Control, since it
## was necessary to filter out significant genes for this particular
## variant.

ad_vs_control_names <- dat_fc_pv %>%
  ## Filtered out relevant proteins. In this case, with p-vales <= pv_cut and abs(log2FC) >= fc_cut
  filter(abs(fc_control_ad) >= fc_cut & ad_control_pv <= pv_cut) %>%
  ## Sorting the table
  arrange(-abs(fc_control_ad)) %>%
  slice_head(n=25) %>%
  ## The full form of protein names is needed to select them from the
  ## cra.log table.  The short form of protein names is needed for the
  ## graph.
  dplyr::select(ProteinIDs, PIDshort)

## Then, I filtered the required proteins from the cra.log dataframe,
## transposed the resulting table and changed its class from matrix to
## dataframe.
avc <- cra.log[ad_vs_control_names$ProteinIDs, ] %>% t %>% data.frame

## Renamed columns by protein names.
colnames(avc) <- ad_vs_control_names$PIDshort

## Created a "condition" column filled with NAs. It is needed to build boxplots.
avc$condition <- NA

## In the "condition" column, I replaced NAs with the variants names: Control, AD, PSP.
avc[control_cols, ]$condition <- "Control"
avc[ad_cols, ]$condition <- "AD"
avc[psp_cols, ]$condition <- "PSP"
avc %>% filter(condition != "PSP") %>%
  ## For ggplot, you need to convert the data from wide format to long format.
  pivot_longer(-condition, names_to = "protein", values_to = "level") %>%
  ## gglot uses the "condition" column to group boxplots: fill=condition in aes().
  ggplot(aes(protein, level, fill=condition)) +
  geom_boxplot(outlier.size = 0.5) +
  scale_x_discrete(guide = guide_axis(n.dodge = 3)) +
  xlab("") +
  ylab("log2 expression level") +
  ggtitle("AD vs Control top 25 significant proteins (Number of samples: AD = 84, Control = 31)") +
  theme_light() +
  theme(plot.title = element_text(size = 16, face = "bold"))
## Warning: Removed 322 rows containing non-finite values (stat_boxplot).
ggsave("./figs_prot/boxplot_ad_vs_control.png", dpi = 300, scale = 2, width = 6, height = 3, units = "in") 
## Warning: Removed 322 rows containing non-finite values (stat_boxplot).

2.6.2 PSP vs Control

psp_vs_control_names <- dat_fc_pv %>%
  filter(psp_control_pv <= pv_cut & abs(fc_control_psp) >= fc_cut) %>%
  arrange(-abs(fc_control_psp)) %>%
  slice_head(n=25) %>%
  dplyr::select(ProteinIDs, PIDshort)

pvc <- cra.log[psp_vs_control_names$ProteinIDs, ] %>% t %>% data.frame

colnames(pvc) <- psp_vs_control_names$PIDshort

pvc$condition <- NA

pvc[control_cols, ]$condition <- "Control"
pvc[ad_cols, ]$condition <- "AD"
pvc[psp_cols, ]$condition <- "PSP"

pvc %>% filter(condition != "AD") %>%
  pivot_longer(-condition, names_to = "protein", values_to = "level") %>%
  mutate(conditions = factor(condition, levels = c("PSP", "Control"))) %>%
  ggplot(aes(protein, level, fill=conditions)) +
  geom_boxplot(outlier.size = 0.5) +
  scale_x_discrete(guide = guide_axis(n.dodge = 3)) +
  xlab("") +
  ylab("log2 expression level") +
  ggtitle("PSP vs Control top 25 significant proteins (Number of samples: PSP = 85, Control = 31)") +
  theme_light() +
  theme(plot.title = element_text(size = 16, face = "bold"))
## Warning: Removed 346 rows containing non-finite values (stat_boxplot).
ggsave("./figs_prot/boxplot_psp_vs_control.png", dpi = 300, scale = 2, width = 6, height = 3, units = "in") 
## Warning: Removed 346 rows containing non-finite values (stat_boxplot).

2.6.3 AD vs PSP

psp_vs_ad_names <- dat_fc_pv %>%
  filter(abs(fc_ad_psp) >= fc_cut & ad_psp_pv <= pv_cut ) %>%
  arrange(-abs(fc_ad_psp)) %>%
  slice_head(n=25) %>%
  dplyr::select(ProteinIDs, PIDshort)


pva <- cra.log[psp_vs_ad_names$ProteinIDs, ]  %>% t %>% data.frame

## %>% t %>% data.frame

colnames(pva) <- psp_vs_ad_names$PIDshort

pva$condition <- NA

pva[control_cols, ]$condition <- "Control"
pva[ad_cols, ]$condition <- "AD"
pva[psp_cols, ]$condition <- "PSP"


pva %>% filter(condition != "Control") %>%
  pivot_longer(-condition, names_to = "protein", values_to = "level") %>%
  ggplot(aes(protein, level, fill=condition)) +
  geom_boxplot(outlier.size = 0.5) +
  scale_x_discrete(guide = guide_axis(n.dodge = 3)) +
  xlab("") +
  ylab("log2 expression level") +
  ggtitle("PSP vs AD top 25 significant proteins (Number of samples: AD = 84, PSP = 85)") +
  theme_light() +
  theme(plot.title = element_text(size = 16, face = "bold"))
## Warning: Removed 392 rows containing non-finite values (stat_boxplot).
ggsave("./figs_prot/boxplot_ad_vs_psp.png", dpi = 300, scale = 2, width = 6, height = 3, units = "in") 
## Warning: Removed 392 rows containing non-finite values (stat_boxplot).

d 167 rows containing non-finite values (stat_boxplot).

3 Significantly DE transcripts

3.1 Data

transcripts <- read.delim("./data_transcr/MayoRNAseq_RNAseq_TCX_transcriptCounts.tsv",
                          stringsAsFactors = FALSE) %>%
  `rownames<-`(.$ensembl_id)

ensembl_id <- data.frame(transcripts[1])

transcripts <- transcripts[-1] 

We form a table with the names of gene samples, RNA and diagnosis.

## transcripts_norm %>% names %>% sub("^.+_([0-9]+)_.+", "\\1", .)
names(transcripts) <- transcripts %>%
  names %>%
  sub("^X([0-9]+_.+)", "\\1", .)

id_keys_raw <- read.csv("./data_transcr/Mayo_Proteomics_ID_key.csv",
                        strip.white = TRUE,
                        header = TRUE,
                        stringsAsFactors = FALSE) %>%
  na.omit %>%
  dplyr::select(1:2)

id_keys <- read.csv("./data_transcr/Mayo_Proteomics_TC_traits.csv",
                    strip.white = TRUE,
                    header = TRUE,
                    stringsAsFactors = FALSE) %>%
  slice_head(n=200) %>%
  dplyr::select(c(1,4)) %>%
  cbind.data.frame(., id_keys_raw) %>%
  dplyr::select(c(1,4,2)) %>%
  filter(Samples_Simple != "b1_091_20b")

rm(id_keys_raw) 
samples_in_common <- intersect(names(transcripts),
                               id_keys$RNA_SampleID) 

We leave only lines with samples in common.

id_keys <- id_keys %>%
  filter(RNA_SampleID %in% samples_in_common) 

Selecting only relevant columns from the data.

transcripts <- transcripts %>%
  dplyr::select(all_of(id_keys$RNA_SampleID)) 

We get and save the column numbers for each of the conditions (AD, PSP, Control).

control_cols <- id_keys %>%
  mutate(n = rownames(.) %>% as.numeric) %>%
  filter(Diagnosis == "Control") %>%
  .$n

ad_cols <- id_keys %>%
  mutate(n = rownames(.) %>% as.numeric) %>%
  filter(Diagnosis == "AD") %>%
  .$n

psp_cols <- id_keys %>%
  mutate(n = rownames(.) %>% as.numeric) %>%
  filter(Diagnosis == "PSP") %>%
  .$n 

3.2 Filtering to remove lowly expressed genes

Genes with very low counts across all libraries provide little evidence for differential expression.

To filter out lowly expressed genes, we filtering on a minimum counts per million threshold present in at least 31 samples. 31 represents the smallest sample size (in Control group) for each group in our experiment. We choose to retain genes if they are expressed at a counts-per-million (CPM) above 1 in at least 31 samples.

We’ll use the cpm function from the edgeR library to generate the CPM values. By converting to CPMs we are normalising for the different sequencing depths for each sample.

# Obtain CPMs
transcripts_cpm <- cpm(transcripts)
# Have a look at the output
head(transcripts_cpm)[,1:5] 
##                 7095_TCX 1019_TCX  1137_TCX  1036_TCX 1010_TCX
## ENST00000000233 28.24212 56.10630 105.82034  67.82287 67.92063
## ENST00000000412  2.14055  4.98927   4.44295   4.16509  4.36092
## ENST00000000442  0.08737  0.09183   0.09006   0.12015  0.10445
## ENST00000001008 19.28677 91.76579  92.88174 140.55173 85.91268
## ENST00000001146  2.03134  9.15209  33.74243  18.54266  7.78176
## ENST00000002125  0.00000  0.06122   0.09006   0.04005  0.02611

A threshold of 1 CPM in at least minimum group sample size is a good rule of thumb.

# Which values in myCPM are greater than 0.5?
thresh <- transcripts_cpm > 1 
# This produces a logical matrix with TRUEs and FALSEs
head(thresh)[,1:5] 
##                 7095_TCX 1019_TCX 1137_TCX 1036_TCX 1010_TCX
## ENST00000000233     TRUE     TRUE     TRUE     TRUE     TRUE
## ENST00000000412     TRUE     TRUE     TRUE     TRUE     TRUE
## ENST00000000442    FALSE    FALSE    FALSE    FALSE    FALSE
## ENST00000001008     TRUE     TRUE     TRUE     TRUE     TRUE
## ENST00000001146     TRUE     TRUE     TRUE     TRUE     TRUE
## ENST00000002125    FALSE    FALSE    FALSE    FALSE    FALSE
# Summary of how many TRUEs there are in each row
# There are 11433 genes that have TRUEs in all 12 samples.
table(rowSums(thresh)) 
## 
##      0      1      2      3      4      5      6      7      8      9     10 
## 137246   5138   1747   1204    858    715    602    515    458    398    390 
##     11     12     13     14     15     16     17     18     19     20     21 
##    365    346    331    290    248    299    237    219    246    223    190 
##     22     23     24     25     26     27     28     29     30     31     32 
##    209    184    209    166    188    186    162    171    141    166    142 
##     33     34     35     36     37     38     39     40     41     42     43 
##    117    143    134    142    126    143    123    136    134    141    115 
##     44     45     46     47     48     49     50     51     52     53     54 
##    131    142    122    113    104    101    105     93    125    106    120 
##     55     56     57     58     59     60     61     62     63     64     65 
##    100     94     99    113     97    107    101     86    118    106    100 
##     66     67     68     69     70     71     72     73     74     75     76 
##    111    109     84     86    101    106     79     88    103     94     94 
##     77     78     79     80     81     82     83     84     85     86     87 
##     90     93    105     94     80     98     91    114     90     92     89 
##     88     89     90     91     92     93     94     95     96     97     98 
##     90     86    106     91     91     81     96    102     93     81     86 
##     99    100    101    102    103    104    105    106    107    108    109 
##     89     81    102     88    106     75     90     79     93     86    101 
##    110    111    112    113    114    115    116    117    118    119    120 
##     96     78     94     86     89     85     75     95     96     75    110 
##    121    122    123    124    125    126    127    128    129    130    131 
##     82     95     90     95     82     80     88     94    100    102    109 
##    132    133    134    135    136    137    138    139    140    141    142 
##     97     94     99    110     98     93     92     96    107    118     95 
##    143    144    145    146    147    148    149    150    151    152    153 
##    121    133    100    106    117    102    113    118    112    135    115 
##    154    155    156    157    158    159    160    161    162    163    164 
##    131    120    124    120    126    139    135    135    141    132    147 
##    165    166    167    168    169    170    171    172    173    174    175 
##    142    190    156    179    183    201    176    191    194    211    236 
##    176    177    178    179    180    181    182    183    184    185    186 
##    251    243    269    256    292    325    340    408    396    421    464 
##    187    188    189    190    191    192    193    194    195    196    197 
##    517    532    644    656    795    930   1063   1313   2013   3445  22685
# we would like to keep genes that have at least 31 TRUES in each row of thresh
keep <- rowSums(thresh) >= 31
# Subset the rows of countdata to keep the more highly expressed genes
transcripts_keep <- transcripts[keep,]
summary(keep) 
##    Mode   FALSE    TRUE 
## logical  153881   54363
dim(transcripts_keep) 
## [1] 54363   197
# Let's have a look and see whether our threshold of 1 does indeed correspond to a count of about 30-40
# We will look at the first sample
## plot(transcripts_cpm[,1],transcripts[,1],
##      ylim=c(0,50),xlim=c(0,3))
## abline(v=1) 

3.3 Convert counts to DGEList object

Next we’ll create a DGEList object. This is an object used by edgeR to store count data. It has a number of slots for storing various parameters about the data.

dgeObj <- DGEList(transcripts_keep)
# have a look at dgeObj
dgeObj[,1:5] 
## An object of class "DGEList"
## $counts
##                 7095_TCX 1019_TCX 1137_TCX 1036_TCX 1010_TCX
## ENST00000000233     1293     1833     3525     3387     2601
## ENST00000000412       98      163      148      208      167
## ENST00000001008      883     2998     3094     7019     3290
## ENST00000001146       93      299     1124      926      298
## ENST00000002165       91      556      435      918      758
## 54358 more rows ...
## 
## $samples
##          group lib.size norm.factors
## 7095_TCX     1 45550775            1
## 1019_TCX     1 31991405            1
## 1137_TCX     1 32772147            1
## 1036_TCX     1 49196988            1
## 1010_TCX     1 37712160            1

3.4 Quality control

We can look at a few different plots to check that the data is good quality.

3.4.1 Library sizes and distribution plots

Check how many reads we have for each sample in the dgeObj.

dgeObj$samples$lib.size 
##   [1] 45550775 31991405 32772147 49196988 37712160 30090126 35546996 66964691
##   [9] 37161623 36941520 52712105 47641750 40568238 34014083 35280111 39318949
##  [17] 42250545 40768654 43110918 35437439 38756019 36919775 39396416 37576727
##  [25] 53457952 39096885 48610519 48500575 47069216 38149904 48966776 65230041
##  [33] 40866245 39182880 36113104 55386798 36813570 41440955 41602120 30661837
##  [41] 39852497 44509078 37017685 34774158 43782636 52191604 44053451 53062467
##  [49] 71336831 53478854 40339548 38057041 40970127 41058288 41986088 40102188
##  [57] 44425361 34548532 33678531 30455478 36325005 37520429 45039493 35615322
##  [65] 52198010 40447856 40235904 61684913 50005587 37795709 45376671 38364056
##  [73] 37013042 86669819 50845067 38775710 38862319 54822972 33099333 44024426
##  [81] 47236330 38231326 40137081 32839797 39832435 44608859 44001175 41780728
##  [89] 50979762 48105520 40766437 37050055 50424532 40393128 37745480 33890328
##  [97] 46091083 43491393 59223517 42756890 42241034 51544473 49324578 42474170
## [105] 40672703 41635851 40868376 33838611 40849880 36383401 52890008 37009451
## [113] 57004673 43353962 42141625 57860670 37061874 43585299 38661440 43606840
## [121] 53672440 36449713 67021400 36207240 43826555 45620641 44653077 35162204
## [129] 35296146 31671760 40000259 54625935 37062573 31831658 39451368 33574059
## [137] 39877799 35192796 36215395 37830907 37437957 41985777 38319939 40262098
## [145] 47492240 59190811 52264681 37924736 46373214 42023695 31880881 52914487
## [153] 41690501 42190452 55767469 39637985 46754624 59240719 39735264 36314715
## [161] 39302145 44735347 35900319 45308074 39357966 40745343 36977404 48982614
## [169] 42860677 33780491 46846071 50115573 40692380 58390293 43829550 34106422
## [177] 33802029 48618773 37992006 47333444 46674794 55557091 35428577 39737316
## [185] 52865317 43324852 42344695 43715703 33518065 44840057 41127482 38817917
## [193] 54159083 64033266 47061409 35876223 47488721

Plot the library sizes as a barplot to see whether there are any major discrepanies between the samples more easily.

png("./figs_transcr/barplot_sample_sizes.png", width = 2000, height = 2000, res = 300)
# The names argument tells the barplot to use the sample names on the x-axis
# The las argument rotates the axis names
barplot(dgeObj$samples$lib.size, names=colnames(dgeObj), las=2, cex.names = .5)
# Add a title to the plot
## title("Barplot of library sizes")
title(paste("Barplot of library sizes\n", "Number of samples: AD = 84, PSP = 85, Control = 31"))
dev.off() 
## png 
##   2

Count data is not normally distributed, so if we want to examine the distributions of the raw counts we need to log the counts. Next we’ll use box plots to check the distribution of the read counts on the log2 scale. We can use the cpm function to get log2 counts per million, which are corrected for the different library sizes. The cpm function also adds a small offset to avoid taking log of zero.

# Get log2 counts per million
logcounts <- cpm(dgeObj, log = TRUE)
# Check distributions of samples using boxplot
png("./figs_transcr/boxplot_samples_unnorm.png", width = 4000, height = 2000, res = 300)
boxplot(logcounts, xlab="", ylab="Log2 counts per million", las=2, cex.axis=.4, outcex=.1)
# Let's add a blue horizontal line that corresponds to the median logCPM
abline(h=median(logcounts),col="blue")
title("Boxplots of logCPMs (unnormalised). Number of samples: AD = 82, PSP = 84, Control = 31")
dev.off() 
## png 
##   2

From the boxplots we see that overall the density distributions of raw log-intensities are not identical but still not very different.

3.4.2 Multidimensional scaling plots

We use the plotMDS function to create the MDS plot. We colour the samples according to the grouping information.

levels(id_keys$Diagnosis %>% as.factor) 
## [1] "AD"      "Control" "PSP"
col.condition <- c("red", "green", "purple")[id_keys$Diagnosis %>% as.factor] 
png("./figs_transcr/mds_plot.png", width = 2000, height = 2000, res = 300)
plotMDS(dgeObj, col=col.condition)
legend("topleft", fill=c("red", "green", "purple"), legend = levels(id_keys$Diagnosis %>% as.factor))
# Add a title
title("MDS plot. Number of samples: AD = 82, PSP = 84, Control = 31")
dev.off() 
## png 
##   2

3.5 Normalisation for composition bias

The trimmed mean of M-values normalization method (TMM) is performed to eliminate composition biases between libraries. This generates a set of normalization factors, where the product of these factors and the library sizes defines the effective library size. The calcNormFactors function in edgeR calculates the normalization factors between libraries. TMM normalisation (and most scaling normalisation methods) scale relative to one sample.

# Apply normalisation to DGEList object
dgeObj <- calcNormFactors(dgeObj) 

This will update the normalisation factors in the DGEList object (their default values are 1).

dgeObj$samples %>% head 
##          group lib.size norm.factors
## 7095_TCX     1 45550775       0.2836
## 1019_TCX     1 31991405       1.1594
## 1137_TCX     1 32772147       1.0841
## 1036_TCX     1 49196988       1.0406
## 1010_TCX     1 37712160       1.0552
## 871_TCX      1 30090126       1.0234

The normalization factors multiply to unity across all libraries. A normalization factor below one indicates that the library size will be scaled down, as there is more suppression (i.e., composition bias) in that library relative to the other libraries. This is also equivalent to scaling the counts upwards in that sample. Conversely, a factor above one scales up the library size and is equivalent to downscaling the counts.

The first two samples have very different normalisation factors. If we plot mean difference plots using the plotMD function for these samples, we see the composition bias problem. We will use the logcounts, which have been normalised for library size, but not for composition bias.

png("./figs_transcr/mean_difference.png", width = 2000, height = 2000, res = 300)
par(mfrow=c(1,2))
plotMD(logcounts, column = 1)
abline(h=0,col="grey")
plotMD(logcounts,column = 2)
abline(h=0,col="grey")
dev.off() 
## png 
##   2

The mean-difference plots show average expression (mean: x-axis) against log-fold-changes (difference: y-axis). Because our DGEList object contains the normalisation factors, plots using dgeObj, show the composition bias problem has been solved.

png("./figs_transcr/mean_difference_2.png", width = 2000, height = 2000, res = 300)
par(mfrow=c(1,2))
plotMD(dgeObj,column = 1)
abline(h=0,col="grey")
plotMD(dgeObj,column = 2)
abline(h=0,col="grey")
dev.off() 
## png 
##   2

## save(group, dgeObj,sampleinfo,file="./Robjects/preprocessing.Rdata") 

3.6 Differential expression with edgeR

3.6.1 Create the design matrix

diagnosis <- as.character(id_keys$Diagnosis)
design <- model.matrix( ~ diagnosis + 0) 
## plotMDS(dgeObj, labels=diagnosis, cex=0.75, xlim=c(-4, 5)) 

3.6.2 Data exploration

The common dispersion estimates the overall BCV of the dataset, averaged over all genes:

dgeObj <- estimateCommonDisp(dgeObj) 

Then we estimate gene-wise dispersion estimates, allowing a possible trend with averge count size:

dgeObj <- estimateGLMTrendedDisp(dgeObj)
dgeObj <- estimateTagwiseDisp(dgeObj) 

Plot the estimated dispersions:

png("./figs_transcr/est_dispersion.png", width = 2000, height = 2000, res = 300)
plotBCV(dgeObj)
mytitle = "Number of samples: AD = 82, PSP = 84, Control = 31"
mtext(side=3, line=2, at=-0.01, adj=0, cex=1, mytitle)
dev.off() 
## png 
##   2

3.6.3 Testing for differential expression

First, we fit genewise glms:

# Fit the linear model
fit <- glmFit(dgeObj, design)
names(fit) 
##  [1] "coefficients"          "fitted.values"         "deviance"             
##  [4] "method"                "counts"                "unshrunk.coefficients"
##  [7] "df.residual"           "design"                "offset"               
## [10] "dispersion"            "prior.count"           "samples"              
## [13] "prior.df"              "AveLogCPM"
head(coef(fit)) 
##                 diagnosisAD diagnosisControl diagnosisPSP
## ENST00000000233      -9.658           -9.327       -9.516
## ENST00000000412     -12.336          -12.295      -12.362
## ENST00000001008      -9.144           -9.235       -8.912
## ENST00000001146     -11.217          -11.193      -10.772
## ENST00000002165     -10.919          -11.137      -11.082
## ENST00000002596     -12.246          -11.869      -12.035

Conduct likelihood ratio tests for luminal vs basal and show the top genes:

3.6.4 Significantly DE transcripts: AD vs Control

Build a contrast AD vs Control

AvsC <- makeContrasts(diagnosisAD - diagnosisControl, levels=design) 
lrt.AvsC <- glmLRT(fit, contrast=AvsC)
## topTags(lrt.AvsC, sort.by = "logFC", p.value = 0.05, n = 61) 

Save list of gene IDs for transcripts.

## gene_ids_tr <- lrt.AvsC %>%
##   rownames %>%
##   gconvert(organism="hsapiens", target="ENSG", filter_na = FALSE) %>%
##   dplyr::select("GeneID" = name)

## saveRDS(gene_ids_tr, "./results_transcr/gene_ids_tr.rds")

gene_ids_tr <- readRDS("./results_transcr/gene_ids_tr.rds") 
## Set log-fold cutoff for transcripts
fc_cut_tr <- 2

ad_vs_control_top_tr  <- topTags(lrt.AvsC, sort.by = "logFC", p.value = 0.05, n = 100) %>%
  data.frame %>%
  filter(abs(logFC) >= fc_cut_tr) %>%
  rownames_to_column(var = "TranscriptIDs")

AD_vs_Control_GeneIDs <- ad_vs_control_top_tr$TranscriptIDs %>%
  noquote %>%
  gconvert(organism="hsapiens", target="ENSG") %>%
  dplyr::select("GeneID" = name)
## write.csv(ad_vs_control_prot, "./results/prot_ad_vs_control.csv")

write.csv(data.frame(AD_vs_Control_GeneIDs, ad_vs_control_top_tr),
          "./results_transcr/ad_vs_control_tr.csv")

knitr::kable(data.frame(AD_vs_Control_GeneIDs, ad_vs_control_top_tr),
             caption = "Top DE transcripts: AD vs Control") 
Table 3.1: Top DE transcripts: AD vs Control
GeneID TranscriptIDs logFC logCPM LR PValue FDR
FER1L6 ENST00000522917 3.731 0.6279 29.43 0 0
MTCO2P12 ENST00000427426 -3.572 3.5340 80.84 0 0
BMP5 ENST00000370830 3.521 1.1730 33.40 0 0
SCARA5 ENST00000380385 3.452 0.6688 34.61 0 0
SLC47A1 ENST00000436810 3.397 0.6401 41.97 0 0
CD177 ENST00000618265 3.392 0.3137 43.55 0 0
FAT2 ENST00000261800 -3.386 1.1699 89.19 0 0
SCARA5 ENST00000354914 3.322 1.7634 32.97 0 0
OGN ENST00000262551 3.223 3.3344 31.91 0 0
SLPI ENST00000338380 2.955 2.3328 25.58 0 0
CYP1B1 ENST00000494864 2.890 1.0422 48.74 0 0
IL1R2 ENST00000332549 2.849 -0.7876 34.76 0 0
CD163 ENST00000396620 2.833 -0.2048 30.21 0 0
SIX2 ENST00000303077 2.786 0.0841 28.12 0 0
CYP1B1 ENST00000614273 2.748 -0.1752 45.53 0 0
HSPA6 ENST00000309758 -2.714 3.6476 32.50 0 0
STEAP4 ENST00000380079 2.674 1.0209 33.91 0 0
GRM4 ENST00000609973 -2.643 0.3437 69.93 0 0
DSP ENST00000379802 2.639 3.2340 40.10 0 0
CBLN3 ENST00000267406 -2.628 2.2207 82.78 0 0
IGFBP5 ENST00000449583 2.569 1.1627 54.59 0 0
SLC13A4 ENST00000491630 2.544 -0.0148 21.67 0 0
IGFBP5 ENST00000486341 2.510 0.5922 53.62 0 0
RGS1 ENST00000474373 2.503 0.2359 26.97 0 0
S100A12 ENST00000368737 2.486 0.3891 26.14 0 0
SELE ENST00000367775 -2.446 0.9562 32.06 0 0
SERPIND1 ENST00000215727 2.429 0.2412 35.80 0 0
FMOD ENST00000493296 2.425 -0.1835 38.60 0 0
GRM4 ENST00000535756 -2.408 2.8107 76.04 0 0
ZIC4 ENST00000472749 -2.400 0.6030 52.99 0 0
CD163 ENST00000539632 2.358 0.1737 23.29 0 0
CD163 ENST00000538840 2.358 0.1730 23.27 0 0
CD44 ENST00000278386 2.302 1.4183 34.36 0 0
DSP ENST00000418664 2.298 2.6612 34.63 0 0
AC093330.1 ENST00000577364 -2.261 2.4077 73.70 0 0
CD44 ENST00000526669 2.236 0.6769 32.71 0 0
ZIC4 ENST00000383075 -2.212 0.9448 47.38 0 0
VNN2 ENST00000418593 2.206 -0.6440 36.21 0 0
CBLN1 ENST00000219197 -2.204 2.5834 68.32 0 0
KRT31 ENST00000251645 -2.189 0.1656 67.92 0 0
MPZL2 ENST00000527282 2.185 -0.5619 37.78 0 0
TMEM30B ENST00000555868 2.177 0.7573 24.04 0 0
MTRNR2L8 ENST00000536684 -2.175 1.8293 71.31 0 0
SLC16A12 ENST00000371790 2.158 0.4949 28.82 0 0
EMP1 ENST00000542289 2.154 1.5059 49.70 0 0
NDNF ENST00000379692 2.123 0.8420 45.68 0 0
PFKP ENST00000460445 2.116 0.0379 51.21 0 0
AQP4 ENST00000578776 2.110 3.5511 50.64 0 0
AQP4 ENST00000383170 2.097 4.1627 50.88 0 0
EMP1 ENST00000431267 2.093 0.3996 48.27 0 0
DSG2 ENST00000261590 2.090 -0.1590 26.88 0 0
PHLDB2 ENST00000393923 2.086 2.2024 40.22 0 0
FCGR3B ENST00000367964 2.074 -0.7721 31.47 0 0
LEPR ENST00000371059 2.060 2.4428 41.95 0 0
CD163 ENST00000359156 2.051 4.0504 20.36 0 0
SERPINA5 ENST00000553780 2.051 0.1347 31.61 0 0
AQP4 ENST00000584088 2.048 1.6928 50.14 0 0
SPTLC3 ENST00000399002 2.047 2.0217 27.60 0 0
NPNT ENST00000514837 2.037 0.4600 62.83 0 0
FAM20A ENST00000226094 2.033 0.6134 28.98 0 0
CD44 ENST00000434472 2.012 -0.0146 29.46 0 0

We get normalized data with cpm(dgeObj, normalized.lib.sizes = TRUE) for the top AD vs Control transcripts.

all_samples_ad_vs_control_tr <- cpm(dgeObj, normalized.lib.sizes = TRUE) %>%
  data.frame() %>%
  filter(rownames(.) %in% ad_vs_control_top_tr$TranscriptIDs) %>%
  dplyr::select(all_of(ad_cols)) %>%
  `rownames<-`(AD_vs_Control_GeneIDs$GeneID %>% make.unique) 

3.6.5 Significantly DE transcripts: PSP vs Control

Build a contrast PSP vs Control

PvsC <- makeContrasts(diagnosisPSP - diagnosisControl, levels=design) 
lrt.PvsC <- glmLRT(fit, contrast=PvsC)
## topTags(lrt.PvsC, sort.by = "logFC", p.value = 0.05, n = 25) 
## fold-change cutoff here is 1.9 to make the numger of transcripts
## close to the number of proteins
psp_vs_control_top_tr  <- topTags(lrt.PvsC, sort.by = "logFC", p.value = 0.05, n = 200) %>%
  data.frame %>%
  filter(abs(logFC) >= 1.9) %>%
  rownames_to_column(var = "TranscriptIDs")

PSP_vs_Control_GeneIDs <- psp_vs_control_top_tr$TranscriptIDs %>%
  noquote %>%
  gconvert(organism="hsapiens", target="ENSG") %>%
  dplyr::select("GeneID" = name)

write.csv(data.frame(PSP_vs_Control_GeneIDs, psp_vs_control_top_tr),
          "./results_transcr/psp_vs_control_tr.csv")

knitr::kable(data.frame(PSP_vs_Control_GeneIDs, psp_vs_control_top_tr),
             caption = "Top DE transcripts: PSP vs Control") 
Table 3.2: Top DE transcripts: PSP vs Control
GeneID TranscriptIDs logFC logCPM LR PValue FDR
COL3A1 ENST00000304636 3.856 2.3556 37.78 0 0e+00
TGFBI ENST00000442011 3.650 2.9260 37.31 0 0e+00
MARCO ENST00000327097 3.364 0.6494 31.49 0 0e+00
COL1A1 ENST00000225964 3.341 2.6313 32.87 0 0e+00
ZIC4 ENST00000472749 -3.216 0.6030 96.46 0 0e+00
ZIC4 ENST00000383075 -3.050 0.9448 91.79 0 0e+00
CD163 ENST00000396620 2.968 -0.2048 32.68 0 0e+00
HMOX1 ENST00000412893 2.921 2.2612 37.14 0 0e+00
FAT2 ENST00000261800 -2.784 1.1699 61.55 0 0e+00
SELE ENST00000367775 -2.766 0.9562 41.67 0 0e+00
RGS1 ENST00000474373 2.763 0.2359 31.89 0 0e+00
CBLN3 ENST00000267406 -2.664 2.2207 86.27 0 0e+00
IFI30 ENST00000597802 2.660 0.9885 30.06 0 0e+00
CD163 ENST00000539632 2.537 0.1737 26.42 0 0e+00
CD163 ENST00000538840 2.536 0.1730 26.40 0 0e+00
MSR1 ENST00000262101 2.523 1.5579 27.73 0 0e+00
ZIC1 ENST00000282928 -2.370 2.4326 69.10 0 0e+00
ZIC1 ENST00000472523 -2.317 2.6913 67.03 0 0e+00
GRM4 ENST00000609973 -2.307 0.3437 53.80 0 0e+00
COL6A3 ENST00000295550 2.280 0.8936 27.34 0 0e+00
AC093330.1 ENST00000577364 -2.210 2.4077 71.21 0 0e+00
GRM4 ENST00000535756 -2.152 2.8107 61.19 0 0e+00
CD163 ENST00000359156 2.141 4.0504 22.00 0 0e+00
LIF ENST00000249075 2.128 0.1906 17.03 0 1e-04
MTCO3P12 ENST00000416718 -2.094 2.8876 75.96 0 0e+00
CD68 ENST00000584502 2.074 0.8735 30.40 0 0e+00
AL359091.1 ENST00000608502 -2.023 4.2662 61.92 0 0e+00
MTND2P28 ENST00000457540 -2.010 8.5698 56.98 0 0e+00
KRT31 ENST00000251645 -2.005 0.1656 57.44 0 0e+00
MTRNR2L8 ENST00000536684 -1.992 1.8293 60.24 0 0e+00
HMOX1 ENST00000216117 1.981 4.0728 22.88 0 0e+00
CERCAM ENST00000613052 -1.975 3.8814 65.14 0 0e+00
MOBP ENST00000452959 -1.973 8.5285 64.12 0 0e+00
HBA2 ENST00000482565 -1.954 1.9636 49.47 0 0e+00
HBA2 ENST00000251595 -1.954 1.9635 49.47 0 0e+00
MTRNR2L12 ENST00000600213 -1.942 3.8243 63.04 0 0e+00
MBP ENST00000397865 -1.939 9.9753 63.93 0 0e+00
CBLN1 ENST00000219197 -1.937 2.5834 52.98 0 0e+00
ZIC1 ENST00000488404 -1.932 1.4645 47.00 0 0e+00
MOBP ENST00000424090 -1.931 4.6079 59.86 0 0e+00
MBP ENST00000397875 -1.931 9.9826 64.07 0 0e+00
MBP ENST00000490319 -1.922 10.0626 63.70 0 0e+00
MBP ENST00000585201 -1.905 10.0999 63.43 0 0e+00
AC026691.1 ENST00000602502 -1.902 2.6420 70.87 0 0e+00

We get normalized data with cpm(dgeObj, normalized.lib.sizes = TRUE) for the top AD vs Control transcripts.

all_samples_psp_vs_control_tr <- cpm(dgeObj, normalized.lib.sizes = TRUE) %>%
  data.frame() %>%
  filter(rownames(.) %in% psp_vs_control_top_tr$TranscriptIDs) %>%
  dplyr::select(all_of(psp_cols)) %>%
  `rownames<-`(PSP_vs_Control_GeneIDs$GeneID %>% make.unique) 

3.6.6 Significantly DE transcripts: AD vs PSP

Build a contrast AD vs PSP

AvsP <- makeContrasts(diagnosisAD - diagnosisPSP, levels=design) 
lrt.AvsP <- glmLRT(fit, contrast=AvsP)
## topTags(lrt.AvsP, sort.by = "logFC", p.value = 0.05, n = 25) 
## fold-change cutoff here is 1 to make the numger of transcripts
## close to the number of proteins
ad_vs_psp_top_tr  <- topTags(lrt.AvsP, sort.by = "logFC", p.value = 0.05, n = 500) %>%
  data.frame %>%
  filter(abs(logFC) >= 1) %>%
  rownames_to_column(var = "TranscriptIDs")

AD_vs_PSP_GeneIDs <- ad_vs_psp_top_tr$TranscriptIDs %>%
  noquote %>%
  gconvert(organism="hsapiens", target="ENSG", filter_na = FALSE) %>%
  dplyr::select("GeneID" = name)
## write.csv(ad_vs_psp_prot, "./results/prot_ad_vs_psp.csv")

write.csv(data.frame(AD_vs_PSP_GeneIDs, ad_vs_psp_top_tr),
          "./results_transcr/ad_vs_psp_tr.csv")

knitr::kable(data.frame(AD_vs_PSP_GeneIDs, ad_vs_psp_top_tr),
             caption = "Top DE transcripts: AD vs PSP") 
Table 3.3: Top DE transcripts: AD vs PSP
GeneID TranscriptIDs logFC logCPM LR PValue FDR
AC091057.1 ENST00000602616 -2.265 1.6246 51.810 0.0000 0.0000
FER1L6 ENST00000522917 2.173 0.6279 27.169 0.0000 0.0000
TGFBI ENST00000442011 -2.163 2.9260 34.882 0.0000 0.0000
LTF ENST00000231751 2.057 0.4877 40.320 0.0000 0.0000
SCARA5 ENST00000354914 2.024 1.7634 31.963 0.0000 0.0000
SCARA5 ENST00000380385 1.989 0.6688 30.637 0.0000 0.0000
CD177 ENST00000618265 1.976 0.3137 39.434 0.0000 0.0000
COL1A1 ENST00000225964 -1.921 2.6313 28.277 0.0000 0.0000
COL3A1 ENST00000304636 -1.894 2.3556 25.416 0.0000 0.0000
HSPA6 ENST00000309758 -1.867 3.6476 22.707 0.0000 0.0000
S100A12 ENST00000368737 1.825 0.3891 33.761 0.0000 0.0000
BMP5 ENST00000370830 1.802 1.1730 23.794 0.0000 0.0000
MTCO2P12 ENST00000427426 -1.796 3.5340 30.611 0.0000 0.0000
SLC47A1 ENST00000436810 1.787 0.6401 31.157 0.0000 0.0000
HMOX1 ENST00000412893 -1.741 2.2612 32.929 0.0000 0.0000
IL1R2 ENST00000332549 1.685 -0.7876 31.052 0.0000 0.0000
MARCO ENST00000327097 -1.649 0.6494 20.184 0.0000 0.0001
IFI30 ENST00000597802 -1.633 0.9885 27.524 0.0000 0.0000
CXCL10 ENST00000306602 1.632 0.0206 26.084 0.0000 0.0000
SLPI ENST00000338380 1.617 2.3328 19.556 0.0000 0.0001
HMOX1 ENST00000216117 -1.564 4.0728 31.726 0.0000 0.0000
SIX2 ENST00000303077 1.549 0.0841 21.966 0.0000 0.0000
TMEM30B ENST00000555868 1.546 0.7573 28.188 0.0000 0.0000
OGN ENST00000262551 1.522 3.3344 18.888 0.0000 0.0001
STEAP4 ENST00000380079 1.506 1.0209 26.854 0.0000 0.0000
SPTLC3 ENST00000399002 1.478 2.0217 32.882 0.0000 0.0000
None ENST00000320354 1.474 0.2405 31.598 0.0000 0.0000
SFN ENST00000339276 1.469 0.8425 19.689 0.0000 0.0001
CLIC6 ENST00000360731 1.464 -0.0853 24.822 0.0000 0.0000
IFI30 ENST00000407280 -1.419 2.7254 26.052 0.0000 0.0000
CRH ENST00000276571 -1.417 0.6056 32.910 0.0000 0.0000
NPNT ENST00000379987 1.402 1.6386 68.243 0.0000 0.0000
DSP ENST00000379802 1.397 3.2340 27.846 0.0000 0.0000
SST ENST00000287641 -1.364 3.3812 42.191 0.0000 0.0000
ECM2 ENST00000444490 1.356 1.9907 40.702 0.0000 0.0000
SLC47A1 ENST00000497548 1.349 0.4482 34.871 0.0000 0.0000
NPNT ENST00000514837 1.347 0.4600 63.942 0.0000 0.0000
SLC47A1 ENST00000270570 1.337 0.5090 35.019 0.0000 0.0000
SLC47A1 ENST00000575023 1.335 0.4090 34.526 0.0000 0.0000
DSP ENST00000418664 1.331 2.6612 27.613 0.0000 0.0000
SLC22A6 ENST00000540654 1.329 -0.0151 31.742 0.0000 0.0000
SLC13A4 ENST00000491630 1.328 -0.0148 14.574 0.0001 0.0007
SLC47A1 ENST00000581558 1.327 0.3578 34.393 0.0000 0.0000
PIRT ENST00000580256 1.326 2.2798 40.411 0.0000 0.0000
PNOC ENST00000301908 -1.326 -0.3262 45.834 0.0000 0.0000
VGF ENST00000611537 -1.316 3.5267 49.392 0.0000 0.0000
IGF2 ENST00000381395 1.305 3.2840 30.428 0.0000 0.0000
None ENST00000611099 1.303 3.0742 35.441 0.0000 0.0000
VGF ENST00000249330 -1.298 3.5811 49.119 0.0000 0.0000
None ENST00000300632 1.296 3.1619 30.406 0.0000 0.0000
NPNT ENST00000305572 1.291 0.9332 70.545 0.0000 0.0000
AL355974.1 ENST00000618896 1.286 -0.5409 31.485 0.0000 0.0000
ECM2 ENST00000344604 1.283 0.2427 41.634 0.0000 0.0000
CPXM2 ENST00000368854 1.279 0.8488 34.869 0.0000 0.0000
VGF ENST00000445482 -1.276 3.4696 48.634 0.0000 0.0000
IGFBP5 ENST00000449583 1.275 1.1627 33.417 0.0000 0.0000
PHLDB2 ENST00000393923 1.271 2.2024 34.653 0.0000 0.0000
PLIN1 ENST00000300055 1.267 1.5933 60.119 0.0000 0.0000
NPNT ENST00000505917 1.258 1.0131 69.889 0.0000 0.0000
APLNR ENST00000257254 1.246 5.0432 33.802 0.0000 0.0000
APLNR ENST00000606794 1.246 5.0432 33.789 0.0000 0.0000
SLC16A12 ENST00000371790 1.244 0.4949 22.570 0.0000 0.0000
IGFBP5 ENST00000486341 1.242 0.5922 32.537 0.0000 0.0000
RASSF9 ENST00000361228 1.242 -0.5991 32.997 0.0000 0.0000
LEPR ENST00000371059 1.236 2.4428 35.041 0.0000 0.0000
FCGBP ENST00000616721 -1.236 4.5363 20.114 0.0000 0.0001
TXNIP ENST00000488537 1.233 4.8761 56.977 0.0000 0.0000
TBX15 ENST00000207157 1.228 -0.0383 40.492 0.0000 0.0000
ZIC1 ENST00000488404 1.210 1.4645 28.567 0.0000 0.0000
VNN2 ENST00000418593 1.195 -0.6440 25.795 0.0000 0.0000
THSD4 ENST00000357769 1.181 -0.1266 36.314 0.0000 0.0000
DSG2 ENST00000261590 1.176 -0.1590 19.958 0.0000 0.0001
TXNIP ENST00000486597 1.174 2.9089 53.940 0.0000 0.0000
CP ENST00000264613 1.168 -0.3037 27.227 0.0000 0.0000
TNFRSF11B ENST00000297350 1.167 2.4486 35.359 0.0000 0.0000
PLIN1 ENST00000560330 1.166 0.5525 58.699 0.0000 0.0000
FGL2 ENST00000248598 1.159 3.5594 28.418 0.0000 0.0000
THSD4 ENST00000261862 1.155 3.0478 38.126 0.0000 0.0000
PHLDB2 ENST00000481953 1.154 0.1905 32.039 0.0000 0.0000
THSD4 ENST00000355327 1.150 3.1585 37.826 0.0000 0.0000
TXNIP ENST00000582401 1.142 7.2801 54.803 0.0000 0.0000
VCAM1 ENST00000370119 1.133 1.4273 22.377 0.0000 0.0000
IL1RL1 ENST00000311734 1.133 1.2369 15.743 0.0001 0.0004
None ENST00000406510 1.122 2.8575 35.130 0.0000 0.0000
ZIC1 ENST00000472523 1.119 2.6913 24.037 0.0000 0.0000
PLIN1 ENST00000430628 1.117 0.2383 56.023 0.0000 0.0000
BNC2 ENST00000380672 1.117 0.6369 30.048 0.0000 0.0000
ZIC1 ENST00000282928 1.109 2.4326 23.252 0.0000 0.0000
OLFML2A ENST00000373580 1.103 1.6315 36.271 0.0000 0.0000
IL1RL1 ENST00000404917 1.103 1.5676 14.986 0.0001 0.0006
FOXD1 ENST00000615637 1.102 -0.2919 36.913 0.0000 0.0000
SLC7A2 ENST00000494857 1.101 5.2677 38.420 0.0000 0.0000
SLC6A20 ENST00000353278 1.099 1.2855 23.896 0.0000 0.0000
LEPR ENST00000616738 1.095 1.1400 34.119 0.0000 0.0000
OLFML2A ENST00000288815 1.094 1.5563 36.263 0.0000 0.0000
IL1RL1 ENST00000409584 1.084 1.0576 14.087 0.0002 0.0008
CLEC4E ENST00000299663 1.083 -0.1953 26.982 0.0000 0.0000
ANGPT2 ENST00000523120 1.083 0.0113 26.545 0.0000 0.0000
CYP1B1 ENST00000494864 1.080 1.0422 17.790 0.0000 0.0002
AC139426.1 ENST00000568218 -1.072 -0.7013 6.267 0.0123 0.0282
S100A8 ENST00000477801 1.071 2.0185 17.906 0.0000 0.0002
LEPR ENST00000371060 1.069 1.1811 34.119 0.0000 0.0000
SLC26A2 ENST00000286298 1.068 3.7736 37.417 0.0000 0.0000
LINC02192 ENST00000570024 -1.063 0.5860 31.730 0.0000 0.0000
AL390755.1 ENST00000614099 1.060 0.7093 33.202 0.0000 0.0000
SLC6A20 ENST00000456124 1.060 0.1308 24.563 0.0000 0.0000
CYP1B1 ENST00000614273 1.058 -0.1752 17.525 0.0000 0.0002
GJB2 ENST00000382844 1.058 1.3173 19.354 0.0000 0.0001
GALNT15 ENST00000430410 1.058 -0.4972 44.634 0.0000 0.0000
KRT5 ENST00000252242 -1.056 -0.4930 27.619 0.0000 0.0000
GJB2 ENST00000382848 1.056 1.3846 19.097 0.0000 0.0001
SLC26A2 ENST00000503336 1.054 3.7111 37.184 0.0000 0.0000
COL24A1 ENST00000370571 -1.052 3.2431 51.189 0.0000 0.0000
S100A8 ENST00000368732 1.050 2.4470 17.775 0.0000 0.0002
None ENST00000380874 1.048 3.4435 42.039 0.0000 0.0000
MMRN1 ENST00000394980 1.042 1.2118 15.916 0.0001 0.0004
ANGPT2 ENST00000338312 1.042 0.1032 24.425 0.0000 0.0000
CP ENST00000481169 1.034 4.4195 26.592 0.0000 0.0000
SERTM1 ENST00000315190 -1.030 3.4243 35.456 0.0000 0.0000
FRMPD2 ENST00000474573 -1.028 0.8971 35.413 0.0000 0.0000
CXCL8 ENST00000307407 -1.028 1.4163 12.903 0.0003 0.0014
FAM20A ENST00000226094 1.027 0.6134 17.291 0.0000 0.0002
IL1RL1 ENST00000233954 1.026 2.4945 12.780 0.0004 0.0015
COL24A1 ENST00000426639 -1.025 -0.4053 48.578 0.0000 0.0000
PGR ENST00000325455 1.024 1.5546 42.491 0.0000 0.0000
SLC7A2 ENST00000004531 1.022 4.8790 37.049 0.0000 0.0000
CP ENST00000473296 1.022 -0.5519 24.944 0.0000 0.0000
IL1RL1 ENST00000463990 1.020 -0.5689 12.798 0.0003 0.0014
AL390755.1 ENST00000617298 1.018 0.5406 30.927 0.0000 0.0000
KLF4 ENST00000493306 1.017 0.2475 34.246 0.0000 0.0000
IL1RL1 ENST00000427077 1.015 -0.7783 13.490 0.0002 0.0011
EMP1 ENST00000542289 1.014 1.5059 26.247 0.0000 0.0000
LIPH ENST00000296252 -1.014 0.1836 71.546 0.0000 0.0000
LPAR4 ENST00000435339 1.014 -0.4470 31.699 0.0000 0.0000
EMP1 ENST00000431267 1.013 0.3996 26.999 0.0000 0.0000
AP001993.1 ENST00000527317 -1.013 -0.2069 34.277 0.0000 0.0000
LINC01007 ENST00000437900 -1.013 1.0189 31.519 0.0000 0.0000
LINC01007 ENST00000434537 -1.013 1.0189 31.519 0.0000 0.0000
None ENST00000431497 -1.011 0.8876 37.144 0.0000 0.0000
DLX6-AS1 ENST00000452769 -1.009 0.9233 37.165 0.0000 0.0000
IL18R1 ENST00000410040 1.008 0.1367 24.146 0.0000 0.0000
GIMAP6 ENST00000493969 1.008 -0.5940 42.095 0.0000 0.0000
DLX6-AS1 ENST00000430027 -1.006 3.3202 37.706 0.0000 0.0000

We get normalized data with cpm(dgeObj, normalized.lib.sizes = TRUE) for the top AD vs Control transcripts.

all_samples_ad_vs_psp_tr <- cpm(dgeObj, normalized.lib.sizes = TRUE) %>%
  data.frame() %>%
  filter(rownames(.) %in% ad_vs_psp_top_tr$TranscriptIDs) %>%
  dplyr::select(all_of(ad_cols)) %>%
  `rownames<-`(AD_vs_PSP_GeneIDs$GeneID %>% make.unique) 

3.6.7 Save lists of DE transcripts to Excel

write.xlsx(all_samples_ad_vs_control_tr, "./results_transcr/transcriptsdata.xlsx",
           sheetName = "all_samples_ad_vs_control_tr", append = TRUE)
write.xlsx(all_samples_ad_vs_psp_tr, "./results_transcr/transcriptsdata.xlsx",
           sheetName = "all_samples_ad_vs_psp_tr", append = TRUE)
write.xlsx(all_samples_psp_vs_control_tr, "./results_transcr/transcriptsdata.xlsx",
           sheetName = "all_samples_psp_vs_control_tr", append = TRUE)
write.xlsx(ad_vs_control_top_tr, "./results_transcr/transcriptsdata.xlsx",
           sheetName = "ad_vs_control_top_tr", append = TRUE)
write.xlsx(psp_vs_control_top_tr, "./results_transcr/transcriptsdata.xlsx",
           sheetName = "psp_vs_control_top_tr", append = TRUE)
write.xlsx(ad_vs_psp_top_tr, "./results_transcr/transcriptsdata.xlsx",
           sheetName = "ad_vs_psp_top_tr", append = TRUE) 

3.7 Clustering and heatmaps

3.7.1 Clustering AD-Control

Prepare an object for clustering (gather AD and Control samples in one data frame).

all_samples_ad_and_control_tr <- cpm(dgeObj, normalized.lib.sizes = TRUE) %>%
  data.frame() %>%
  filter(rownames(.) %in% ad_vs_control_top_tr$TranscriptIDs) %>%
  dplyr::select(all_of(c(ad_cols, control_cols))) %>%
  `colnames<-`(c(make.unique(rep("AD", length(ad_cols))),
                 make.unique(rep("Control", length(control_cols))))) %>%
  `rownames<-`(AD_vs_Control_GeneIDs$GeneID %>% make.unique) 

Transform to matrix

## data.matrix() leaves numeric data as is.
ad_tr_matrix <- data.matrix(all_samples_ad_and_control_tr)

## rownames(ad_tr_matrix) <- dat_filt_ad$PIDshort
## ad_tr_matrix <- ad_tr_matrix[,-1] 
ad_tr_scaled <- ad_tr_matrix %>% t %>% scale %>% t 

Euclidean distance between experiments: AD & Control

ad_dist_sampl <- ad_tr_scaled %>% t %>%
  dist(., method = "euclidean", diag = FALSE, upper = FALSE) 

Euclidean distance between proteins: AD & Control

ad_dist_tr <- ad_tr_scaled %>% 
  dist(., method = "euclidean", diag = FALSE, upper = FALSE) 

Hierarchical Ward’s clustering

ad_hclust_sampl <- hclust(ad_dist_sampl, method = "ward.D2", members = NULL)
ad_hclust_tr <- hclust(ad_dist_tr, method = "ward.D2", members = NULL)


png("./figs_transcr/hclust_ad.png", width = 2500, height = 3000, res = 300)
par(mfrow = c(2,1))
ad_hclust_sampl %>% plot(cex = 0.4, main = "Conditions. Number of samples: AD = 82, Control = 31")
ad_hclust_tr %>% plot(cex = 0.6, main = "Proteins. Number of samples: AD = 82, Control = 31")
dev.off() 
## png 
##   2

3.7.2 Heatmap AD-Control

# Set colours for heatmap, 25 increments
my_palette <- colorRampPalette(c("blue","white","red"))(n = 25)

png("./figs_transcr/heatmap_ad.png", width = 2000, height = 2000, res = 300)
# Plot heatmap with heatmap.2
par(cex.main=0.75) # Shrink title fonts on plot
ad_tr_scaled %>%
  # Plot heatmap
  gplots::heatmap.2(.,                     # Tidy, normalised data
                    Colv=as.dendrogram(ad_hclust_sampl),     # Experiments clusters in cols
                    Rowv=as.dendrogram(ad_hclust_tr),     # Protein clusters in rows
                    revC=TRUE,                  # Flip plot to match pheatmap
                    density.info="histogram",   # Plot histogram of data and colour key
                    trace="none",               # Turn of trace lines from heat map
                    col = my_palette,           # Use my colour scheme
                    cexRow=0.3, cexCol=0.2,     # Amend row and column label fonts
                    xlab = paste0("Number of samples: AD = ", length(ad_cols), ", Control = ", length(control_cols))
                    )
dev.off() 
## png 
##   2

3.7.3 Clustering PSP-Control

Prepare an object for clustering (gather AD and Control samples in one data frame).

all_samples_psp_and_control_tr <- cpm(dgeObj, normalized.lib.sizes = TRUE) %>%
  data.frame() %>%
  filter(rownames(.) %in% psp_vs_control_top_tr$TranscriptIDs) %>%
  dplyr::select(all_of(c(psp_cols, control_cols))) %>%
  `colnames<-`(c(make.unique(rep("PSP", length(ad_cols))),
                 make.unique(rep("Control", length(control_cols))))) %>%
  `rownames<-`(PSP_vs_Control_GeneIDs$GeneID %>% make.unique) 

Transform to matrix

## data.matrix() leaves numeric data as is.
psp_tr_matrix <- data.matrix(all_samples_psp_and_control_tr) 
psp_tr_scaled <- psp_tr_matrix %>% t %>% scale %>% t 

Euclidean distance between experiments: PSP & Control

psp_dist_sampl <- psp_tr_scaled %>% t %>%
  dist(., method = "euclidean", diag = FALSE, upper = FALSE) 

Euclidean distance between proteins: PSP & Control

psp_dist_tr <- psp_tr_scaled %>% 
  dist(., method = "euclidean", diag = FALSE, upper = FALSE) 

Hierarchical Ward’s clustering

psp_hclust_sampl <- hclust(psp_dist_sampl, method = "ward.D2", members = NULL)
psp_hclust_tr <- hclust(psp_dist_tr, method = "ward.D2", members = NULL)


png("./figs_transcr/hclust_psp.png", width = 2500, height = 3000, res = 300)
par(mfrow = c(2,1))
psp_hclust_sampl %>% plot(cex = 0.4, main = "Conditions. Number of samples: PSP = 84, Control = 31")
psp_hclust_tr %>% plot(cex = 0.6, main = "Proteins. Number of samples: PSP = 84, Control = 31")
dev.off() 
## png 
##   2

3.7.4 Heatmap PSP-Control

# Set colours for heatmap, 25 increments
my_palette <- colorRampPalette(c("blue","white","red"))(n = 25)

png("./figs_transcr/heatmap_psp.png", width = 2000, height = 2000, res = 300)
# Plot heatmap with heatmap.2
par(cex.main=0.75) # Shrink title fonts on plot
psp_tr_scaled %>%
  # Plot heatmap
  gplots::heatmap.2(.,                     # Tidy, normalised data
                    Colv=as.dendrogram(psp_hclust_sampl),     # Experiments clusters in cols
                    Rowv=as.dendrogram(psp_hclust_tr),     # Protein clusters in rows
                    revC=TRUE,                  # Flip plot to match pheatmap
                    density.info="histogram",   # Plot histogram of data and colour key
                    trace="none",               # Turn of trace lines from heat map
                    col = my_palette,           # Use my colour scheme
                    cexRow=0.3, cexCol=0.2,     # Amend row and column label fonts
                    xlab = paste0("Number of samples: PSP = ", length(psp_cols), ", Control = ", length(control_cols))
                    )
dev.off() 
## png 
##   2

3.7.5 Clustering AD-PSP

Prepare an object for clustering (gather AD and Control samples in one data frame).

all_samples_ad_and_psp_tr <- cpm(dgeObj, normalized.lib.sizes = TRUE) %>%
  data.frame() %>%
  filter(rownames(.) %in% ad_vs_psp_top_tr$TranscriptIDs) %>%
  dplyr::select(all_of(c(ad_cols, psp_cols))) %>%
  `colnames<-`(c(make.unique(rep("AD", length(ad_cols))),
                 make.unique(rep("PSP", length(control_cols))))) %>%
  `rownames<-`(AD_vs_PSP_GeneIDs$GeneID %>% make.unique) 

Transform to matrix

## data.matrix() leaves numeric data as is.
ad_psp_tr_matrix <- data.matrix(all_samples_ad_and_psp_tr) 
ad_psp_tr_scaled <- ad_psp_tr_matrix %>% t %>% scale %>% t 

Euclidean distance between experiments: AD & Control

ad_psp_dist_sampl <- ad_psp_tr_scaled %>% t %>%
  dist(., method = "euclidean", diag = FALSE, upper = FALSE) 

Euclidean distance between proteins: AD & PSP

ad_psp_dist_tr <- ad_psp_tr_scaled %>% 
  dist(., method = "euclidean", diag = FALSE, upper = FALSE) 

Hierarchical Ward’s clustering

ad_psp_hclust_sampl <- hclust(ad_psp_dist_sampl, method = "ward.D2", members = NULL)
ad_psp_hclust_tr <- hclust(ad_psp_dist_tr, method = "ward.D2", members = NULL)


png("./figs_transcr/hclust_ad_psp.png", width = 2500, height = 3000, res = 300)
par(mfrow = c(2,1))
ad_psp_hclust_sampl %>% plot(cex = 0.4, main = "Conditions. Number of samples: AD = 82, PSP = 84")
ad_psp_hclust_tr %>% plot(cex = 0.6, main = "Proteins. Number of samples: AD = 82, PSP = 84")
dev.off() 
## png 
##   2

3.7.6 Heatmap AD-PSP

# Set colours for heatmap, 25 increments
my_palette <- colorRampPalette(c("blue","white","red"))(n = 25)

png("./figs_transcr/heatmap_ad_psp.png", width = 2000, height = 2000, res = 300)
# Plot heatmap with heatmap.2
par(cex.main=0.75) # Shrink title fonts on plot
ad_psp_tr_scaled %>%
  # Plot heatmap
  gplots::heatmap.2(.,                     # Tidy, normalised data
                    Colv=as.dendrogram(ad_psp_hclust_sampl),     # Experiments clusters in cols
                    Rowv=as.dendrogram(ad_psp_hclust_tr),     # Protein clusters in rows
                    revC=TRUE,                  # Flip plot to match pheatmap
                    density.info="histogram",   # Plot histogram of data and colour key
                    trace="none",               # Turn of trace lines from heat map
                    col = my_palette,           # Use my colour scheme
                    cexRow=0.3, cexCol=0.2,     # Amend row and column label fonts
                    xlab = paste0("Number of samples: AD = ", length(ad_cols), ", PSP = ", length(psp_cols))
                    )
dev.off() 
## png 
##   2

3.8 Venn diagram for significantly DE transcripts

3.8.1 All transcripts

## venn_list_tr <- list("AD vs Control" =  topTags(lrt.AvsC, sort.by = "logFC",
##                                                 p.value = 0.05, n = nrow(lrt.AvsC)) %>%
##                        rownames() %>%
##                        noquote() %>%
##                        gconvert(organism="hsapiens", target="ENSG") %>%
##                        pull(name),
##                      "PSP vs Control" = topTags(lrt.PvsC, sort.by = "logFC",
##                                                 p.value = 0.05, n = nrow(lrt.PvsC)) %>%
##                        rownames() %>%
##                        noquote() %>%
##                        gconvert(organism="hsapiens", target="ENSG") %>%
##                        pull(name),
##                      "AD vs PSP" = topTags(lrt.AvsP, sort.by = "logFC",
##                                            p.value = 0.05, n = nrow(lrt.AvsP)) %>%
##                        rownames() %>%
##                        noquote() %>%
##                        gconvert(organism="hsapiens", target="ENSG") %>%
##                        pull(name))

## saveRDS(venn_list_tr, "./results_transcr/venn_list_tr.rds")

venn_list_tr <- readRDS("./results_transcr/venn_list_tr.rds")

# Prevent the output of a log file
futile.logger::flog.threshold(futile.logger::ERROR, name = "VennDiagramLogger")
## NULL
# Create a venn diagram object
tr_venn <- venn.diagram(venn_list_tr,
                          NULL,
                          col = "transparent",
                          fill = c("cornflowerblue", "green", "yellow"),
                          alpha = 0.50,
                          cex = 0.8,
                          fontfamily = "sans",
                          fontface = "bold",
                          cat.col = c("darkblue", "darkgreen", "orange"),
                          cat.cex = 0.8,
                          cat.fontfamily = "sans",
                          margin = 0.2,
                          main = "Pairwise significantly DE transctipts identified in the experiment",
                          main.pos = c(0.5, 1.05),
                          main.fontfamily = "sans",
                          sub = "Number of samples: AD = 82, PSP = 84, Control = 31",
                          sub.pos = c(0.5, 0.92),
                          sub.cex = 0.8,
                          sub.fontfamily = "sans",
                          print.mode = c("raw","percent") # Show both numbers and percent
                          )

# Plot the venn diagram using the gridExtra package
png("./figs_transcr/venn.png", width = 2000, height = 2000, res = 300)
grid.arrange(gTree(children = tr_venn))
dev.off() 
## png 
##   2

## venn_list_tr_2 <- list(
##   "AD-C +"  =  topTags(lrt.AvsC, sort.by = "logFC",
##                        p.value = 0.05, n = nrow(lrt.AvsC)) %>%
##     data.frame() %>%
##     filter(logFC > 0) %>%
##     rownames() %>%
##     noquote() %>%
##     gconvert(organism="hsapiens", target="ENSG") %>%
##     pull(name),
##   "AD-C -"  =  topTags(lrt.AvsC, sort.by = "logFC",
##                        p.value = 0.05, n = nrow(lrt.AvsC)) %>%
##     data.frame() %>%
##     filter(logFC < 0) %>%
##     rownames() %>%
##     noquote() %>%
##     gconvert(organism="hsapiens", target="ENSG") %>%
##     pull(name),
##   "P-C +" = topTags(lrt.PvsC, sort.by = "logFC",
##                     p.value = 0.05, n = nrow(lrt.PvsC)) %>%
##     data.frame() %>%
##     filter(logFC > 0) %>%
##     rownames() %>%
##     noquote() %>%
##     gconvert(organism="hsapiens", target="ENSG") %>%
##     pull(name),
##   "P-C -" = topTags(lrt.PvsC, sort.by = "logFC",
##                     p.value = 0.05, n = nrow(lrt.PvsC)) %>%
##     data.frame() %>%
##     filter(logFC < 0) %>%
##     rownames() %>%
##     noquote() %>%
##     gconvert(organism="hsapiens", target="ENSG") %>%
##     pull(name),
##   "AD-PSP +" = topTags(lrt.AvsP, sort.by = "logFC",
##                        p.value = 0.05, n = nrow(lrt.AvsP)) %>%
##     data.frame() %>%
##     filter(logFC > 0) %>%
##     rownames() %>%
##     noquote() %>%
##     gconvert(organism="hsapiens", target="ENSG") %>%
##     pull(name),
##   "AD-PSP -" = topTags(lrt.AvsP, sort.by = "logFC",
##                        p.value = 0.05, n = nrow(lrt.AvsP)) %>%
##     data.frame() %>%
##     filter(logFC < 0) %>%
##     rownames() %>%
##     noquote() %>%
##     gconvert(organism="hsapiens", target="ENSG") %>%
##     pull(name)
## )

## saveRDS(venn_list_tr_2, "./results_transcr/venn_list_tr_2.rds")

venn_list_tr_2 <- readRDS("./results_transcr/venn_list_tr_2.rds") 

3.8.2 Upregulated transcripts

tr_venn_up <- venn.diagram(venn_list_tr_2[c(1,3,5)],NULL,
                          col = "transparent",
                          fill = c("cornflowerblue", "green", "yellow"),
                          alpha = 0.50,
                          cex = 0.8,
                          fontfamily = "sans",
                          fontface = "bold",
                          cat.col = c("darkblue", "darkgreen", "orange"),
                          cat.cex = 0.8,
                          cat.fontfamily = "sans",
                          margin = 0.2,
                          main = "Pairwise significantly upregulated transcripts (adj.p<0.05) identified in the experiment",
                          main.pos = c(0.5, 1.05),
                          main.fontfamily = "sans",
                          sub = "Number of samples: AD = 82, PSP = 84, Control = 31",
                          sub.pos = c(0.5, 0.92),
                          sub.cex = 0.8,
                          sub.fontfamily = "sans",
                          print.mode = c("raw","percent") # Show both numbers and percent
                          )

# Plot the venn diagram using the gridExtra package
png("./figs_transcr/venn_up.png", width = 2000, height = 2000, res = 300)
grid.arrange(gTree(children = tr_venn_up))
dev.off()
## png 
##   2

3.8.3 Downregulated transcripts

tr_venn_down <- venn.diagram(venn_list_tr_2[c(2,4,6)],NULL,
                          col = "transparent",
                          fill = c("cornflowerblue", "green", "yellow"),
                          alpha = 0.50,
                          cex = 0.8,
                          fontfamily = "sans",
                          fontface = "bold",
                          cat.col = c("darkblue", "darkgreen", "orange"),
                          cat.cex = 0.8,
                          cat.fontfamily = "sans",
                          margin = 0.2,
                          main = "Pairwise significantly downregulated transcripts (adj.p<0.05) identified in the experiment",
                          main.pos = c(0.5, 1.05),
                          main.fontfamily = "sans",
                          sub = "Number of samples: AD = 82, PSP = 84, Control = 31",
                          sub.pos = c(0.5, 0.92),
                          sub.cex = 0.8,
                          sub.fontfamily = "sans",
                          print.mode = c("raw","percent") # Show both numbers and percent
                          )

# Plot the venn diagram using the gridExtra package
png("./figs_transcr/venn_down.png", width = 2000, height = 2000, res = 300)
grid.arrange(gTree(children = tr_venn_down))
dev.off()
## png 
##   2

3.9 Enchanced Volcano plots

EnhancedVolcano(lrt.AvsC %>% data.frame,
                x = "logFC",
                y = "PValue",
                lab = gene_ids_tr$GeneID,
                title = 'AD versus Control',
                subtitle = "Number of samples: AD = 82, PSP = 84, Control = 31",
                pCutoff = 0.05,
                FCcutoff = 2,
                pointSize = 0.5,
                axisLabSize = 10,
                labSize = 2.0)

ggsave("./figs_transcr/ad_vs_control_volcano.png") 
## Saving 7 x 5 in image
EnhancedVolcano(lrt.PvsC %>% data.frame,
                x = "logFC",
                y = "PValue",
                lab = gene_ids_tr$GeneID,
                title = 'PSP versus Control',
                subtitle = "Number of samples: AD = 82, PSP = 84, Control = 31",
                pCutoff = 0.05,
                FCcutoff = 1.9,
                pointSize = 0.5,
                axisLabSize = 10,
                labSize = 2.0)

ggsave("./figs_transcr/psp_vs_control_volcano.png")
## Saving 7 x 5 in image
EnhancedVolcano(lrt.AvsP %>% data.frame,
                x = "logFC",
                y = "PValue",
                lab = gene_ids_tr$GeneID,
                title = 'AD versus PSP',
                subtitle = "Number of samples: AD = 82, PSP = 84, Control = 31",
                pCutoff = 0.05,
                FCcutoff = 1,
                pointSize = 0.5,
                axisLabSize = 10,
                labSize = 2.0)

ggsave("./figs_transcr/ad_vs_psp_volcano.png") 
## Saving 7 x 5 in image

3.10 Boxplots

3.10.1 AD vs Control

cpm(dgeObj, log = TRUE, normalized.lib.sizes = TRUE) %>%
  data.frame() %>%
  filter(rownames(.) %in% ad_vs_control_top_tr$TranscriptIDs) %>%
  dplyr::select(all_of(c(ad_cols, control_cols, psp_cols))) %>%
  `rownames<-`(AD_vs_Control_GeneIDs$GeneID %>% make.unique) %>%
  t %>% data.frame %>%
  dplyr::select(1:25) %>%
  mutate(condition = c(rep("AD", length(ad_cols)),
                       rep("Control", length(control_cols)),
                       rep("PSP", length(psp_cols)))) %>%
  filter(condition != "PSP") %>%
  ## For ggplot, you need to convert the data from wide format to long format.
  pivot_longer(-condition, names_to = "protein", values_to = "level") %>%
  ## gglot uses the "condition" column to group boxplots: fill=condition in aes().
  ggplot(aes(protein, level, fill=condition)) +
  geom_boxplot(outlier.size = 0.5) +
  scale_x_discrete(guide = guide_axis(n.dodge = 3)) +
  xlab("") +
  ylab("log2 expression level") +
  ggtitle("AD vs Control top 25 significant genes. Number of samples: AD = 82, Control = 31") +
  theme_light() +
  theme(plot.title = element_text(size = 16, face = "bold"))
ggsave("./figs_transcr/boxplot_ad_vs_control.png", dpi = 300, scale = 2, width = 6, height = 3, units = "in") 

3.10.2 PSP vs Control

cpm(dgeObj, log = TRUE, normalized.lib.sizes = TRUE) %>%
  data.frame() %>%
  filter(rownames(.) %in% psp_vs_control_top_tr$TranscriptIDs) %>%
  dplyr::select(all_of(c(ad_cols, control_cols, psp_cols))) %>%
  `rownames<-`(PSP_vs_Control_GeneIDs$GeneID %>% make.unique) %>%
  t %>% data.frame %>%
  dplyr::select(1:25) %>%
  mutate(condition = c(rep("AD", length(ad_cols)),
                       rep("Control", length(control_cols)),
                       rep("PSP", length(psp_cols)))) %>%
  filter(condition != "AD") %>%
  ## For ggplot, you need to convert the data from wide format to long format.
  pivot_longer(-condition, names_to = "protein", values_to = "level") %>%
  ## gglot uses the "condition" column to group boxplots: fill=condition in aes().
  mutate(conditions = factor(condition, levels = c("PSP", "Control"))) %>%
  ggplot(aes(protein, level, fill=conditions)) +
  geom_boxplot(outlier.size = 0.5) +
  scale_x_discrete(guide = guide_axis(n.dodge = 3)) +
  xlab("") +
  ylab("log2 expression level") +
  ggtitle("PSP vs Control top 25 significant genes. Number of samples: PSP = 84, Control = 31") +
  theme_light() +
  theme(plot.title = element_text(size = 16, face = "bold"))
ggsave("./figs_transcr/boxplot_psp_vs_control.png", dpi = 300, scale = 2, width = 6, height = 3, units = "in") 

3.10.3 AD vs PSP

cpm(dgeObj, log = TRUE, normalized.lib.sizes = TRUE) %>%
  data.frame() %>%
  filter(rownames(.) %in% ad_vs_psp_top_tr$TranscriptIDs) %>%
  dplyr::select(all_of(c(ad_cols, control_cols, psp_cols))) %>%
  `rownames<-`(AD_vs_PSP_GeneIDs$GeneID %>% make.unique) %>%
  t %>% data.frame %>%
  dplyr::select(1:25) %>%
  mutate(condition = c(rep("AD", length(ad_cols)),
                       rep("Control", length(control_cols)),
                       rep("PSP", length(psp_cols)))) %>%
  filter(condition != "Control") %>%
  ## For ggplot, you need to convert the data from wide format to long format.
  pivot_longer(-condition, names_to = "protein", values_to = "level") %>%
  ## gglot uses the "condition" column to group boxplots: fill=condition in aes().
  ggplot(aes(protein, level, fill=condition)) +
  geom_boxplot(outlier.size = 0.5) +
  scale_x_discrete(guide = guide_axis(n.dodge = 3)) +
  xlab("") +
  ylab("log2 expression level") +
  ggtitle("AD vs PSP top 25 significant genes. Number of samples: AD = 82, PSP = 84") +
  theme_light() +
  theme(plot.title = element_text(size = 16, face = "bold"))
ggsave("./figs_transcr/boxplot_ad_vs_psp.png", dpi = 300, scale = 2, width = 6, height = 3, units = "in") 

4 Correlation Analysis

We use results obtained earlier.

4.1 A function to format the correlation matrix (for saving in CSV)

From: A simple function to format the correlation matrix

# ++++++++++++++++++++++++++++
# flattenCorrMatrix
# ++++++++++++++++++++++++++++
# cormat : matrix of the correlation coefficients
# pmat : matrix of the correlation p-values
flattenCorrMatrix <- function(cormat, pmat) {
  ut <- upper.tri(cormat)
  data.frame(
    row = rownames(cormat)[row(cormat)[ut]],
    column = rownames(cormat)[col(cormat)[ut]],
    cor  =(cormat)[ut],
    p = pmat[ut]
    )
} 

4.2 Proteins

We select from the normalized data and from the normalized and log2-transformed protein data only those samples that are also in the RNA data.

4.2.1 Selecting top AD proteins from samples (all, AD, PSP, Control).

ccol <- c(1:31) # Control columns in 'data_all'
acol <- c(32:115) # AD columns in 'data_all'
pcol <- c(116:200) # PSP columns in 'data_all'

## We remove 3 samples that are absent in the transcriptomics.
acol <- acol[-c(29, 38)]
pcol <- pcol[-4] 
## AD proteins in AD samples
ad_pr_top <- data_all %>%
  `rownames<-`(.$PIDshort %>% make.unique) %>%
  filter(rownames(.) %in% rownames(AvsC_top)) %>%
  ## filter(rownames(.) %in% ad_vs_control_prot$PIDshort) %>%
  dplyr::select(all_of(acol))

## AD proteins in PSP samples
psp_pr_top <- data_all %>%
  `rownames<-`(.$PIDshort %>% make.unique) %>%
  filter(rownames(.) %in% rownames(AvsC_top)) %>%
  ## filter(rownames(.) %in% ad_vs_control_prot$PIDshort) %>%
  dplyr::select(all_of(pcol))

## AD proteins in Control samples
control_pr_top <- data_all %>%
  `rownames<-`(.$PIDshort %>% make.unique) %>%
  filter(rownames(.) %in% rownames(AvsC_top)) %>%
  ## filter(rownames(.) %in% ad_vs_control_prot$PIDshort) %>%
  dplyr::select(all_of(ccol)) 

4.2.2 AD top proteins in all samples

## data_all contains log2 transformed data
ad_pr_top_in_all <- data_all %>%
  `rownames<-`(.$PIDshort %>% make.unique) %>%
  filter(rownames(.) %in% rownames(AvsC_top)) %>%
  ## filter(rownames(.) %in% ad_vs_control_prot$PIDshort) %>%
  dplyr::select(all_of(c(acol, pcol, ccol))) 
ad_pr_top_cor_all <- rcorr(t(as.matrix(ad_pr_top_in_all)), t(as.matrix(ad_pr_top_in_all)),
                     type = "spearman") 
png("./figs_cor/prot_ad_in_all.png", width = 2000, height = 2000, res = 300)
corrplot(ad_pr_top_cor_all$r[1:nrow(ad_pr_top), 1:nrow(ad_pr_top)],
         p.mat = ad_pr_top_cor_all$P[1:nrow(ad_pr_top), 1:nrow(ad_pr_top)],
         sig.level = 0.05, insig = "blank",
         ## type="upper", order="hclust",
         col = colorRampPalette(c("darkblue", "white", "green"))(200),
         tl.col = "black", tl.cex = 0.4, tl.srt = 45)
mytitle = "Number of samples: AD = 82, PSP = 84, Control = 31"
mtext(side=3, line=2, at=-0.07, adj=0, cex=1, mytitle)
dev.off() 
## png 
##   2

In the above plot, correlations with p-value > 0.05 are shown as blank as they are considered insignificant.

png("./figs_cor/prot_ad_in_all_numb.png", width = 2000, height = 2000, res = 300)
corrplot(ad_pr_top_cor_all$r[1:nrow(ad_pr_top), 1:nrow(ad_pr_top)],
         p.mat = ad_pr_top_cor_all$P[1:nrow(ad_pr_top), 1:nrow(ad_pr_top)],
         sig.level = 0.05, insig = "blank",
         ## type="upper", order="hclust",
         method = "number", number.cex = 0.2,
         col = colorRampPalette(c("darkblue", "white", "green"))(200),
         tl.col = "black", tl.cex = 0.4, tl.srt = 45)
mytitle = "Number of samples: AD = 82, PSP = 84, Control = 31"
mtext(side=3, line=2, at=-0.07, adj=0, cex=1, mytitle)
dev.off() 
## png 
##   2

4.2.2.1 Save to CSV

Save correlation coefficients.

ad_top_r_all <- ad_pr_top_cor_all$r[1:nrow(ad_pr_top), 1:nrow(ad_pr_top)] %>%
  data.frame() %>%
  filter(rownames(.) %in% rownames(AvsC_top))
  ## filter(rownames(.) %in% ad_vs_control_prot$PIDshort)

write.csv(ad_top_r_all, "./results_cor/ad_prot_cor_coeffs_all.csv") 

Save correlation coefficient’s alongside with p-values

write.csv(flattenCorrMatrix(cormat = ad_pr_top_cor_all$r[1:nrow(ad_pr_top),
                                                     1:nrow(ad_pr_top)],
                            pmat = ad_pr_top_cor_all$P[1:nrow(ad_pr_top),
                                                   1:nrow(ad_pr_top)]),
          "./results_cor/ad_prot_cor_coeffs+p-values_all.csv") 

4.3 Transcripts

transcripts_norm <- cpm(dgeObj, log = TRUE,
                        normalized.lib.sizes = TRUE) %>%
  data.frame() 

4.3.1 AD top genes in all samples

ad_all_tr_top <- transcripts_norm[c(ad_cols, psp_cols, control_cols)] %>%
  filter(rownames(.)  %in%  ad_vs_control_top_tr$TranscriptIDs) %>%
  `rownames<-`(rownames(all_samples_ad_vs_control_tr))

ad_tr_top_cor_all <- rcorr(t(as.matrix(ad_all_tr_top)),
                     t(as.matrix(ad_all_tr_top)),
                     type = "spearman") 
png("./figs_cor/transcr_ad_in_all.png", width = 2000, height = 2000, res = 300)
corrplot(ad_tr_top_cor_all$r[1:nrow(all_samples_ad_vs_control_tr),
                         1:nrow(all_samples_ad_vs_control_tr)],
         p.mat = ad_tr_top_cor_all$P[1:nrow(all_samples_ad_vs_control_tr),
                                 1:nrow(all_samples_ad_vs_control_tr)],
         ## type="upper", order="hclust",
         sig.level = 0.05, insig = "blank",
         col = colorRampPalette(c("darkblue", "white", "green"))(200),
         tl.col = "black", tl.cex = 0.4, tl.srt = 45)
mytitle = "Number of samples: AD = 82, PSP = 84, Control = 31"
mtext(side=3, line=2, at=-0.07, adj=0, cex=1, mytitle)
dev.off() 
## png 
##   2

In the above plot, correlations with p-value > 0.05 are shown as blank as they are considered insignificant.

png("./figs_cor/transcr_ad_in_all_numb.png", width = 2000, height = 2000, res = 300)
corrplot(ad_tr_top_cor_all$r[1:nrow(all_samples_ad_vs_control_tr),
                         1:nrow(all_samples_ad_vs_control_tr)],
         p.mat = ad_tr_top_cor_all$P[1:nrow(all_samples_ad_vs_control_tr),
                                 1:nrow(all_samples_ad_vs_control_tr)],
           sig.level = 0.05, insig = "blank",
           ## type="upper", order="hclust",
           method = "number", number.cex = 0.2,
           col = colorRampPalette(c("darkblue", "white", "green"))(200),
           tl.col = "black", tl.cex = 0.4, tl.srt = 45)
mytitle = "Number of samples: AD = 82, PSP = 84, Control = 31"
mtext(side=3, line=2, at=-0.07, adj=0, cex=1, mytitle)
dev.off() 
## png 
##   2

4.3.1.1 Save to CSV

Save correlation coefficients.

write.csv(ad_tr_top_cor_all$r[1:nrow(all_samples_ad_vs_control_tr),
                          1:nrow(all_samples_ad_vs_control_tr)],
          "./results_cor/ad_transcripts_cor_r_all.csv")

## knitr::kable(ad_tr_top_cor$r[1:nrow(all_samples_ad_vs_control_tr), 1:nrow(all_samples_ad_vs_control_tr)], caption = "Correlation coefficients for top 25 AD genes") 

Save correlation coefficient’s p-values

write.csv(flattenCorrMatrix(cormat = ad_tr_top_cor_all$r,
                            pmat = ad_tr_top_cor_all$P),
          "./results_cor/ad_transcripts_cor_coeffs+p-values_all.csv")

## knitr::kable(ad25P, caption = "Correlation coefficient's p-values for top 25 AD proteins") 

4.4 Proteins vs Transcripts

4.4.1 AD top proteins and genes (all samples)

i <- nrow(ad_pr_top_in_all)
j <- nrow(ad_all_tr_top)

ad_pr_tr_cor_all <- rcorr(t(as.matrix(ad_all_tr_top)),
      t(as.matrix(ad_pr_top_in_all)),
      type = "spearman")


## write_rds(list(A=A, B=B), "./ab.rds")
## ab <- readRDS("./ab.rds")
## rcorr(t(T).t(P))$r[number_of_transcripts+1 : number_of_transcripts+number_of_proteins,
##                    1: number_of_transcripts] 
png("./figs_cor/trpr_ad_in_all.png", width = 2000, height = 3300, res = 300)
corrplot(ad_pr_tr_cor_all$r[(j+1) : (i+j), 1:j],
         p.mat = ad_pr_tr_cor_all$P[(j+1) : (i+j), 1:j],
         ## type="upper", order="hclust",
         sig.level = 0.05, insig = "blank",
         col = colorRampPalette(c("darkblue", "white", "green"))(200),
         tl.col = "black", tl.cex = 0.3, tl.srt = 45)
mytitle = "Number of samples: AD = 82, PSP = 84, Control = 31"
mtext(side=3, line=2, at=-0.07, adj=0, cex=1, mytitle)
dev.off() 
## png 
##   2

In the above plot, correlations with p-value > 0.05 are shown as blank as they are considered insignificant.

png("./figs_cor/trpr_ad_in_all_numb.png", width = 2000, height = 3300, res = 300)
corrplot(ad_pr_tr_cor_all$r[(j+1) : (i+j), 1:j],
         p.mat = ad_pr_tr_cor_all$P[(j+1) : (i+j), 1:j],
         ## type="upper", order="hclust",
         sig.level = 0.05, insig = "blank",
         method = "number", number.cex = 0.2,
         col = colorRampPalette(c("darkblue", "white", "green"))(200),
         tl.col = "black", tl.cex = 0.3, tl.srt = 45)
mytitle = "Number of samples: AD = 82, PSP = 84, Control = 31"
mtext(side=3, line=2, at=-0.07, adj=0, cex=1, mytitle)
dev.off() 
## png 
##   2

4.4.1.1 Save to CSV

Save correlation coefficients.

write.csv(ad_pr_tr_cor_all$r[(j+1) : (i+j), 1:j],
          "./results_cor/ad_prot_transcr_cor_r_all.csv") 

Save correlation coefficient’s p-values

write.csv(flattenCorrMatrix(cormat = ad_pr_tr_cor_all$r[(j+1) : (i+j), 1:j],
                            pmat = ad_pr_tr_cor_all$P[(j+1) : (i+j), 1:j]),
          "./results_cor/ad_prot_transcr_cor_coeffs+p-values_all.csv")